# Evaluation on modified environement

1. Modify the environment.
2. Load the checkpoint - [docs](https://docs.ray.io/en/latest/serve/tutorials/rllib.html),
    - load RAY config,
    - build modified env,
    - Load the trained model.
3. Evaluate each seed 5 times.

## Ray startup

In [None]:
import os

import ray
import ray.rllib.algorithms.ddpg as ddpg
import ray.rllib.algorithms.ppo as ppo
import ray.rllib.algorithms.sac as sac
import torch
from ray import serve

In [1]:
print(
    f">>> CUDA_VISIBLE_DEVICES environment variable: {os.environ['CUDA_VISIBLE_DEVICES']}"
)

# torch.cuda.is_available()
# GPU_ID = 0
# os.environ["CUDA_VISIBLE_DEVICES"] = str(GPU_ID)
# torch.cuda.set_device(GPU_ID)
print(
    f">>> That variable has been overwritten: {os.environ['CUDA_VISIBLE_DEVICES']}. Good luck, may AI be ever in your favour!"
)

>>> CUDA_VISIBLE_DEVICES environment variable: 0,1
>>> That variable has been overwritten: 0,1. Good luck, may AI be ever in your favour!


In [2]:
ray.shutdown()
ray.init(ignore_reinit_error=True, num_cpus=16, num_gpus=1, dashboard_host="0.0.0.0")

print(f">>>[torch] Version {torch.__version__}")
print(f">>>[torch] Avaiable GPUs: {torch.cuda.device_count()}")
print(f">>>[torch] Selected GPU (ID): {torch.cuda.current_device()}")
print(f">>>[ray] Version {ray.__version__}")
print(f">>>[ray] Allowed GPUs (IDs): {os.environ.get('CUDA_VISIBLE_DEVICES')}")
print(f">>>[ray] Ray resources: {ray.available_resources()}")
# print(f">>>[ray] Avaiable GPUs: {ray.get_gpu_ids()}")

print("Ray Dashboard URL: http://localhost:8265")

2023-05-19 12:17:56,225	INFO worker.py:1519 -- Started a local Ray instance. View the dashboard at [1m[32m149.156.105.73:8266 [39m[22m


>>>[torch] Version 1.13.1+cu116
>>>[torch] Avaiable GPUs: 2
>>>[torch] Selected GPU (ID): 0
>>>[ray] Version 2.1.0
>>>[ray] Allowed GPUs (IDs): 0,1
>>>[ray] Ray resources: {'accelerator_type:G': 1.0, 'object_store_memory': 11204663705.0, 'GPU': 1.0, 'CPU': 16.0, 'memory': 43827670221.0, 'node:149.156.105.73': 1.0}
Ray Dashboard URL: http://localhost:8265


## Registering default envs

In [None]:
import gym
from ray.rllib.env.wrappers.dm_control_wrapper import DMCEnv
from ray.tune.registry import register_env

from dm_control.suite import ALL_TASKS, BENCHMARKING, EXTRA

In [3]:
from_pixels = False
height = 64
width = 64
frame_skip = 2
channels_first = True
max_horizon = 1000  # dosen't work

env_list = []
DEFAULT_ENV_REGISTRY = {}
print(f">>> Registering DMC Envs...")
for env, task in BENCHMARKING:
    env_task_name = f"{env}-{task}"
    env_list.append(env_task_name)
    env_creator = lambda config: DMCEnv(
        env,
        task,
        from_pixels=from_pixels,
        height=height,
        width=width,
        frame_skip=frame_skip,
        channels_first=channels_first,
    )
    register_env(env_task_name, env_creator)
    DEFAULT_ENV_REGISTRY[env_task_name] = env_creator

print(env_list)
print(
    f'>>> Number of registered envs in RLlib: {len(env_list)} (from total of {len(BENCHMARKING)} from DMC suite "BENCHMARKING").'
)

>>> Registering DMC Envs...
['acrobot-swingup', 'acrobot-swingup_sparse', 'ball_in_cup-catch', 'cartpole-balance', 'cartpole-balance_sparse', 'cartpole-swingup', 'cartpole-swingup_sparse', 'cheetah-run', 'finger-spin', 'finger-turn_easy', 'finger-turn_hard', 'fish-upright', 'fish-swim', 'hopper-stand', 'hopper-hop', 'humanoid-stand', 'humanoid-walk', 'humanoid-run', 'manipulator-bring_ball', 'pendulum-swingup', 'point_mass-easy', 'reacher-easy', 'reacher-hard', 'swimmer-swimmer6', 'swimmer-swimmer15', 'walker-stand', 'walker-walk', 'walker-run']
>>> Number of registered envs in RLlib: 28 (from total of 28 from DMC suite "BENCHMARKING").


## Registering parametrized envs

In [None]:
import copy
import inspect
from typing import List

from dmc_custom_envs import (
    acrobot,
    ball_in_cup,
    cartpole,
    cheetah,
    finger,
    fish,
    hopper,
    humanoid,
    manipulator,
    pendulum,
    point_mass,
    reacher,
    swimmer,
    walker,
)
from dmc_custom_envs.custom_dmc_env import CustomDMCEnv
from dmc_custom_envs.utils import load as dmc_suite_load

from dm_control.suite import _DOMAINS as DMC_DOMAINS
from dm_control.suite import _get_tasks as suite_get_tasks

In [5]:
# Refresh dm_control.suite internal dict of _DOMAINS
DMC_DOMAINS.update(
    {
        name: module
        for name, module in locals().items()
        if inspect.ismodule(module) and hasattr(module, "SUITE")
    }
)

BENCHMARKING_PARAM = suite_get_tasks(tag="benchmarking_param")
# THE NUMBER OF DISCRETE STEPS FOR EACH PARAMETER
# Idea: non-zero values, 20 steps
# IN PERCENTAGE
PARAM_RANGE = 20
PARAM_MIN = 0.1
PARAM_MAX = 2.0


def get_param_value(step: int) -> float:
    return PARAM_MIN + PARAM_MAX / PARAM_RANGE * step


PARAMETRIZED_ENVS = []
PARAMETRIZED_ENVS_REGISTRY = {}
print(f">>> Registering PARAMETRIZED DMC Envs...")

for env, task in BENCHMARKING_PARAM:
    for idx in range(0, PARAM_RANGE):
        env_task_name = f"{env}-{task}_{idx}"
        PARAMETRIZED_ENVS.append(env_task_name)

        params = {"value1": get_param_value(idx)}
        env_creator = lambda env=env, task=task, DMC_DOMAINS=DMC_DOMAINS, params=params: CustomDMCEnv(
            env,
            task,
            domains=DMC_DOMAINS,
            from_pixels=False,
            height=64,
            width=64,
            frame_skip=2,
            channels_first=True,
            environment_kwargs={"params": params},
        )
        plain_env_creator = (
            lambda env=env, task=task, DMC_DOMAINS=DMC_DOMAINS: dmc_suite_load(
                domain_name=env,
                task_name=task,
                domains=DMC_DOMAINS,
                task_kwargs=None,
                visualize_reward=False,
                environment_kwargs={"params": copy.deepcopy(params)},
            )
        )

        PARAMETRIZED_ENVS_REGISTRY[env_task_name] = env_creator
        register_env(env_task_name, env_creator)
    print(env_task_name)

print(
    f'>>> Number of registered envs from RLlib: {len(PARAMETRIZED_ENVS)} (from total of {len(BENCHMARKING_PARAM) * PARAM_RANGE} from DMC suite "BENCHMARKING_PARAM").'
)

>>> Registering PARAMETRIZED DMC Envs...
acrobot-swingup_param_19
acrobot-swingup_sparse_param_19
ball_in_cup-catch_param_19
cartpole-balance_param_19
cartpole-balance_sparse_param_19
cartpole-swingup_param_19
cartpole-swingup_sparse_param_19
cheetah-run_param_19
finger-spin_param_19
finger-turn_easy_param_19
finger-turn_hard_param_19
fish-upright_param_19
fish-swim_param_19
hopper-stand_param_19
hopper-hop_param_19
humanoid-stand_param_19
humanoid-walk_param_19
humanoid-run_param_19
manipulator-bring_ball_param_19
pendulum-swingup_param_19
point_mass-easy_param_19
reacher-easy_param_19
reacher-hard_param_19
swimmer-swimmer6_param_19
swimmer-swimmer15_param_19
walker-stand_param_19
walker-walk_param_19
walker-run_param_19
>>> Number of registered envs from RLlib: 560 (from total of 560 from DMC suite "BENCHMARKING_PARAM").


In [9]:
def get_parametrized_envs(env_task: str) -> List[str]:
    result = [x for x in PARAMETRIZED_ENVS if not x.find(f"{env_task}_param_")]
    if not result:
        raise KeyError("Task not found!")
    return result

## Loading configs & models

### Checkpoints

In [10]:
import json
import re
from pathlib import Path
from typing import Dict, Tuple

from starlette.requests import Request

In [12]:
# Load training config from jsons's parmameters

results_path = Path("/mnt/ws/eval_workdir_results")
CONFIGS = None


def load_results_configs(path: Path) -> Dict[str, Dict]:
    def get_task_env_name(directory_name: str) -> str:
        pattern = ".+?_(.+?)_[0-9,-]+_.+"
        return re.findall(pattern, directory.name)[0]

    algo_dirs = [
        x.name for x in results_path.iterdir() if x.name in ["SAC", "PPO", "DDPG"]
    ]

    result = {}
    for algo in algo_dirs:
        result[algo] = {} if result.get(algo, None) is None else result[algo]
        working_path = path.joinpath(algo)

        for directory in working_path.iterdir():
            env = get_task_env_name(directory.name)
            result[algo][env] = (
                {} if result[algo].get(env, None) is None else result[algo][env]
            )

            with working_path.joinpath(directory, "params.json").open("r") as json_file:
                result[algo][env] = json.load(json_file)

    return result


CONFIGS = load_results_configs(results_path)

print(f">>> CONFGS FOR TRAINED MODELS:")
print(f">> Number of registered algorithms: {len(CONFIGS)}")
for k, v in CONFIGS.items():
    print(f">> {k} task-envs: {len(v)}")

>>> CONFGS FOR TRAINED MODELS:
>> Number of registered algorithms: 3
>> DDPG task-envs: 28
>> SAC task-envs: 28
>> PPO task-envs: 28


In [13]:
# Append the latests checkpoint path to the model

checkpoints_path = Path("/mnt/ws/eval_workdir_checkpoints")


def get_models_paths(directory: Path) -> Dict[str, Dict[str, Dict[int, Path]]]:
    # returns Algo: Env: Seed: Path to the latestcheckpoint

    def get_env_and_seed(directory_name: str) -> Tuple[str, int]:
        pattern = r"(?P<env>.+?)_(?P<seed>\d+)"
        m = re.match(pattern, directory_name)
        return m.group("env"), int(m.group("seed"))

    def get_checkpoint_path(env_dir: Path) -> str:
        base = env_dir.joinpath("final_checkpoint")
        subdirs = sorted([str(x) for x in base.glob("./*")])
        return subdirs[-1]

    result = {}

    for algo_dir in directory.iterdir():
        algo = algo_dir.name

        if result.get(algo, None) is None:
            result[algo] = {}

        for env_dir in algo_dir.iterdir():
            env, seed = get_env_and_seed(env_dir.name)
            if result[algo].get(env, None) is None:
                result[algo][env] = {"checkpoints_paths": {}}
            result[algo][env]["checkpoints_paths"][seed] = str(
                get_checkpoint_path(env_dir)
            )

    return result


MODELS = get_models_paths(checkpoints_path)
print(f">>> Imported models paths.")


def assert_print(val1, val2, text: str) -> None:
    def bool2ok(x: bool) -> str:
        return " OK " if x else "NOK "

    ok = bool2ok(val1 == val2)
    print(f"[{ok}] {text}: {val1}/{val2}")


print(f">>>>>>>> CHECKPOINTS:")
print(f">>>>>> Number of registered algorithms: {len(MODELS)}")
for k, v in MODELS.items():
    for env_name, config in v.items():
        assert_print(
            len(config["checkpoints_paths"]), 5, f">> {k} task-env {env_name} seeds"
        )

assert Path(
    MODELS["PPO"]["fish-swim"]["checkpoints_paths"][0]
), "Can't read an example PPO fish-swim seed #0 !"

>>> Imported models paths.
>>>>>>>> CHECKPOINTS:
>>>>>> Number of registered algorithms: 3
[ OK ] >> DDPG task-env swimmer-swimmer6 seeds: 5/5
[ OK ] >> DDPG task-env humanoid-run seeds: 5/5
[ OK ] >> DDPG task-env fish-swim seeds: 5/5
[ OK ] >> DDPG task-env acrobot-swingup seeds: 5/5
[ OK ] >> DDPG task-env cartpole-swingup seeds: 5/5
[ OK ] >> DDPG task-env humanoid-walk seeds: 5/5
[ OK ] >> DDPG task-env reacher-easy seeds: 5/5
[ OK ] >> DDPG task-env cartpole-balance_sparse seeds: 5/5
[ OK ] >> DDPG task-env walker-walk seeds: 5/5
[ OK ] >> DDPG task-env manipulator-bring_ball seeds: 5/5
[ OK ] >> DDPG task-env acrobot-swingup_sparse seeds: 5/5
[ OK ] >> DDPG task-env swimmer-swimmer15 seeds: 5/5
[ OK ] >> DDPG task-env finger-turn_easy seeds: 5/5
[ OK ] >> DDPG task-env finger-spin seeds: 5/5
[ OK ] >> DDPG task-env cartpole-swingup_sparse seeds: 5/5
[ OK ] >> DDPG task-env point_mass-easy seeds: 5/5
[ OK ] >> DDPG task-env walker-run seeds: 5/5
[ OK ] >> DDPG task-env cartpole-b

### RLModel

In [None]:
import copy
import csv
from datetime import datetime

import numpy as np
import torch

In [15]:
CONFIG_FROM_TRAINING = {
    "env": None,
    "horizon": 1000,
    "num_workers": 1,
    "framework": "torch",
    "model": {
        "fcnet_hiddens": [64, 64],
        "fcnet_activation": "relu",
    },
    "num_gpus": 1,
    "evaluation_num_workers": 1,
    "evaluation_config": {
        "render_env": False,
    },
    "evaluation_interval": 10,
    "log_level": "ERROR",
}

In [16]:
class EvaluationAlreadyDone(Exception):
    pass


# Hackery for Serve.deployment class
def RLModel_get_file_path(
    results_dir: Path, algo: str, env_name: str, task_name: str
) -> Path:
    return results_dir.joinpath(f"{algo}_{env_name}-{task_name}.csv")


# Hackery for Serve.deployment class
def RLModel_skip_already_done_evaluation(
    file_path: Path, param_step: int, seed: int
) -> None:
    rows = []
    with file_path.open("r") as csv_file:
        csv_reader = csv.DictReader(csv_file)
        for row in csv_reader:
            rows.append(row)
    if not rows:
        return
    last_row = rows[-1]

    last_param_step = int(last_row["step"])
    last_seed = int(last_row["seed"])

    if param_step < last_param_step:
        raise EvaluationAlreadyDone(
            f"Last step in CSV file: {last_param_step}, current step: {param_step}"
        )
    elif param_step == last_param_step:
        if seed <= last_seed:
            raise EvaluationAlreadyDone(
                f"Last seed in CSV file: {last_seed}, current seed: {seed}"
            )


# Slicing resources: 12 CPUs/12 =1 CPU, 8GBs of GPU's VRAM/12: = ~0.1 GPU
@serve.deployment(num_replicas=1, ray_actor_options={"num_cpus": 4.0, "num_gpus": 1.0})
class RLModel:
    def __init__(
        self,
        algo: str,
        seed: int,
        config: Dict,
        checkpoint_path: Path,
        results_output: Path,
    ):
        # Re-create the originally used config.
        self.config = config
        self.env_task_param_name = config["env"]
        self.algo = algo
        self.seed = seed
        self.env_name = self.config["env"].split("-")[0]
        task_param_name = self.config["env"].split("-")[1]
        self.task_name = "_".join(task_param_name.split("_")[:-2])
        self.param_step = int(task_param_name.split("_")[-1])
        self.results_filename = self.get_file_path(
            results_output, self.algo, self.env_name, self.task_name
        )

        self.NUMBER_OF_EVALUATIONS = 30

        self._prepare_csv_file()

        # Another hackery - lets try to fix it by using default env to create Algo
        config["env"] = f"{self.env_name}-{self.task_name}"

        # Build the Algorithm instance using the config.
        if self.algo == "DDPG":
            self.algorithm = ddpg.DDPG(config=config)
        elif self.algo == "PPO":
            self.algorithm = ppo.PPO(config=config)
        elif self.algo == "SAC":
            self.algorithm = sac.SAC(config=config)
        else:
            raise KeyError(f"Algorithm {self.algo} is not supported!")

        self.algorithm.restore(str(checkpoint_path))

        print(
            f"> Algorithm {self.algo} with {self.env_name} env for {self.task_name}_param_{self.param_step} task has been build."
        )

        self.input_shape = self.algorithm.get_policy().observation_space.shape

        print(f"> Checkpoint loaded.")
        print(f"> Preparing CSV file.")

        print(f"> Evaluating...")
        self._evaluation()
        print(f"> Evaluation done.")

    @classmethod
    def get_file_path(
        cls, results_dir: Path, algo: str, env_name: str, task_name: str
    ) -> Path:
        return results_dir.joinpath(f"{algo}_{env_name}-{task_name}.csv")

    def _prepare_csv_file(self) -> None:
        if self.results_filename.exists():
            return

        header = self._prepare_csv_header()
        with self.results_filename.open("w") as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerows(header)

    @classmethod
    def skip_already_done_evaluation(
        cls, file_path: Path, param_step: int, seed: int
    ) -> None:
        rows = []
        with file_path.open("r") as csv_file:
            csv_reader = csv.DictReader(csv_file)
            for row in csv_reader:
                rows.append(row)
        if not rows:
            return
        last_row = rows[-1]

        last_param_step = int(last_row["step"])
        last_seed = int(last_row["seed"])

        if param_step < last_param_step:
            raise EvaluationAlreadyDone(
                f"Last step in CSV file: {last_param_step}, current step: {param_step}"
            )
        elif param_step == last_param_step:
            if seed <= last_seed:
                raise EvaluationAlreadyDone(
                    f"Last seed in CSV file: {last_seed}, current seed: {seed}"
                )

    def _prepare_csv_header(self) -> List[List]:
        HEADER_BASE = [None] * (self.NUMBER_OF_EVALUATIONS + 2)
        HEADER_BASE[0] = "step"
        HEADER_BASE[1] = "seed"
        for x in range(self.NUMBER_OF_EVALUATIONS):
            HEADER_BASE[x + 2] = f"eval_score_no_{x}"
        return [HEADER_BASE]

    def _evaluation(self) -> None:
        env = PARAMETRIZED_ENVS_REGISTRY[self.env_task_param_name]()
        total_rewards = [None] * self.NUMBER_OF_EVALUATIONS

        for cnt in range(self.NUMBER_OF_EVALUATIONS):
            state = env.reset()
            done = False
            total_reward = 0.0
            while not done:
                action = self.algorithm.compute_single_action(state)
                state, reward, done, _ = env.step(action)
                total_reward += float(reward)

            total_rewards[cnt] = total_reward
            print(f"> Try #{cnt+1}: Total reward: {total_reward}")

        self._append_new_datarow(
            param_step=self.param_step, seed=self.seed, total_rewards=total_rewards
        )

    def _append_new_datarow(
        self, param_step: int, seed: int, total_rewards: int
    ) -> None:
        datarow = [None] * (self.NUMBER_OF_EVALUATIONS + 2)
        datarow[0] = param_step
        datarow[1] = seed
        for idx, reward in enumerate(total_rewards):
            datarow[idx + 2] = reward
        with self.results_filename.open("a") as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerows([datarow])

    async def __call__(self, request: Request) -> Dict:
        json_input = await request.json()
        obs = json_input["observation"]

        action = self.algorithm.compute_single_action(obs)
        return {"action": int(action)}


def models_generator(
    algo: str, env_config: Dict, models_paths: Dict, results_output: Path
):
    for seed, model_path in sorted(models_paths.items()):
        yield seed, RLModel.bind(algo, seed, env_config, model_path, results_output)

## Evaluation

In [19]:
algos = ["PPO"]
results_path = Path("/mnt/ws/eval_workdir_results")
reward_results_path = Path("/mnt/ws/eval_workdir_results/output")

for algo in algos:
    for env_task in CONFIGS[algo].keys():
        base_env_name = env_task.split("-")[0]
        try:
            get_parametrized_envs(env_task)
        except KeyError:
            # print(f">>>>>>>>>>[SKIPPING] {algo} | env_task: {env_task}")
            continue
        except ValueError:
            print(f">>>>>>>>>>[CRITICAL ERROR]: COULDNT LOAD {env_task} env_task !!!")
            continue

        env_config = CONFIGS[algo][env_task]
        models_paths = MODELS[algo][env_task]["checkpoints_paths"]

        print(f">>>>>>>>>> {algo} Eval | env_task: {env_task}")
        parametrized_envs = get_parametrized_envs(env_task)

        # Skip already done analysis
        maindir = [x for x in results_path.glob(f"{base_env_name}")]
        if maindir:
            print(
                f">>>>>>>> [SKIPPING] Found main env_task catalog (assuming completion)."
            )
            continue

        for param_env in parametrized_envs:
            config = copy.deepcopy(env_config)
            config["env"] = param_env
            print(f">>> Parametrized env_task: {config['env']}")

            env_name = config["env"].split("-")[0]
            _task_param_name = config["env"].split("-")[1]
            task_name = "_".join(_task_param_name.split("_")[:-2])
            param_step = int(_task_param_name.split("_")[-1])

            for seed, model in models_generator(
                algo, config, models_paths, reward_results_path
            ):
                print(f"> seed: {seed}")

                results_filename = RLModel_get_file_path(
                    reward_results_path, algo, env_name, task_name
                )
                try:
                    if results_filename.exists():
                        RLModel_skip_already_done_evaluation(
                            results_filename, param_step, seed
                        )
                except EvaluationAlreadyDone as e:
                    print(f"> SKIPPING according to the CSV results fire: {e}")
                    continue

                serve.run(model)

>>>>>>>>>> PPO Eval | env_task: cartpole-balance_sparse
>>> Parametrized env_task: cartpole-balance_sparse_param_0
> seed: 0
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 0
> seed: 5
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 0
> seed: 10
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 0
> seed: 15
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 0
> seed: 20
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 0
>>> Parametrized env_task: cartpole-balance_sparse_param_1
> seed: 0
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 1
> seed: 5
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 1
> seed: 10
> SKIPPING according to the CSV results fire: Last step in CSV file: 19, current step: 1
> seed: 15
> SKIPPING accordin

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:17:57,593 controller 1594951 http_state.py:129 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:SERVE_PROXY_ACTOR-5e50c04eb9270146d6339e7c1d45d66d1338ad3e090d9b98769645c4' on node '5e50c04eb9270146d6339e7c1d45d66d1338ad3e090d9b98769645c4' listening on '127.0.0.1:8000'
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:17:58,003 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(HTTPProxyActor pid=1594976)[0m INFO:     Started server process [1594976]
[2m[36m(ServeReplica:RLModel pid=1595006)[0m 2023-05-19 12:17:59,198	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1595006)[0m 2023-05-19 12:17:59,199	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INF

[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Algorithm PPO with humanoid env for stand_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1595006)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.


[2m[36m(ServeReplica:RLModel pid=1595006)[0m 2023-05-19 12:18:03,068	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1595006)[0m 2023-05-19 12:18:03,068	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1595269)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1595269)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1595269)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1595268)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #1: Total reward: 6.987191834943562
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #2: Total reward: 6.342499096382232
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #3: Total reward: 4.688840591848323
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #4: Total reward: 7.249992004931649
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #5: Total reward: 9.300364492696218
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #6: Total reward: 7.497706423184823
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #7: Total reward: 8.074083506090306
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #8: Total reward: 2.420714798576011
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #9: Total reward: 3.896759666214648
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #10: Total reward: 0.9013865712923971
[2m[36m(ServeReplica:RLModel pid=1595006)[0m > Try #11: Total reward: 6.304505071920165
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:18:19,074 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:18:21,217 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m 2023-05-19 12:18:22,407	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m 2023-05-19 12:18:22,408	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1595435)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1595

[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Algorithm PPO with humanoid env for stand_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1595394)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #1: Total reward: 0.5378352928153914
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #2: Total reward: 1.6772470428089201
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #3: Total reward: 5.562561583817333
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #4: Total reward: 5.723439496392371
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #5: Total reward: 7.452039872960328
[2m[36m(ServeReplica:RLModel pid=1595394)[0m > Try #6: Total reward: 4.237880272820232
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:18:42,185 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:18:44,327 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m 2023-05-19 12:18:45,504	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m 2023-05-19 12:18:45,505	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1595817)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1595

[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Algorithm PPO with humanoid env for stand_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1595781)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #1: Total reward: 0.7174975127226363
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #2: Total reward: 5.328711537771522
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #3: Total reward: 0.22559545954347904
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #4: Total reward: 8.083113360940034
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #5: Total reward: 0.16927678499758847
[2m[36m(ServeReplica:RLModel pid=1595781)[0m > Try #6: Total reward: 5.917728165590523
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:05,291 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:07,437 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m 2023-05-19 12:19:08,632	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m 2023-05-19 12:19:08,633	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1596199)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1596

[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Algorithm PPO with humanoid env for stand_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1596164)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #1: Total reward: 2.4829978064284997
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #2: Total reward: 6.345706418938685
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #3: Total reward: 2.537434590732132
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #4: Total reward: 9.748534766197665
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #5: Total reward: 4.656390201903913
[2m[36m(ServeReplica:RLModel pid=1596164)[0m > Try #6: Total reward: 4.912797209211674
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:29,294 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:31,435 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1596554)[0m 2023-05-19 12:19:32,622	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1596554)[0m 2023-05-19 12:19:32,623	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1596594)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1596

[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Algorithm PPO with humanoid env for stand_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1596554)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.


[2m[36m(ServeReplica:RLModel pid=1596554)[0m 2023-05-19 12:19:36,370	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1596554)[0m 2023-05-19 12:19:36,370	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 493.1542057991028, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1596817)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1596817)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1596817)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1596816)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #1: Total reward: 4.087045853855166
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #2: Total reward: 2.4539709626661494
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #3: Total reward: 3.700413869357594
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #4: Total reward: 3.9931024258182006
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #5: Total reward: 2.9440585423923475
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #6: Total reward: 1.3520814195430901
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #7: Total reward: 3.1638917310855548
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #8: Total reward: 10.13675271137021
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #9: Total reward: 2.3205241188241628
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #10: Total reward: 4.9629759834933
[2m[36m(ServeReplica:RLModel pid=1596554)[0m > Try #11: Total reward: 8.398235495503032
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:52,395 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:19:54,536 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1596939)[0m 2023-05-19 12:19:55,728	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1596939)[0m 2023-05-19 12:19:55,728	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1596978)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1596

[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Algorithm PPO with humanoid env for stand_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1596939)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.


[2m[36m(ServeReplica:RLModel pid=1596939)[0m 2023-05-19 12:19:59,498	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1596939)[0m 2023-05-19 12:19:59,498	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1597203)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1597203)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1597203)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1597202)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #1: Total reward: 2.5632740831779413
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #2: Total reward: 3.3002329490261437
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #3: Total reward: 5.371138467727083
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #4: Total reward: 0.30922493522213257
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #5: Total reward: 6.572750757910791
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #6: Total reward: 11.993603593587467
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #7: Total reward: 3.1587826730221096
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #8: Total reward: 6.507776513679172
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #9: Total reward: 13.00172541400004
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #10: Total reward: 1.130514611513632
[2m[36m(ServeReplica:RLModel pid=1596939)[0m > Try #11: Total reward: 5.073259303890187
[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:20:15,504 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:20:17,646 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m 2023-05-19 12:20:18,834	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m 2023-05-19 12:20:18,835	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1597362)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1597

[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Algorithm PPO with humanoid env for stand_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1597324)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #1: Total reward: 7.415761363561133
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #2: Total reward: 3.8054196267452824
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #3: Total reward: 1.036590375102049
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #4: Total reward: 2.694802703587099
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #5: Total reward: 0.32676448716086154
[2m[36m(ServeReplica:RLModel pid=1597324)[0m > Try #6: Total reward: 5.35541933507779
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:20:38,511 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:20:40,653 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m 2023-05-19 12:20:41,834	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m 2023-05-19 12:20:41,834	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1597750)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1597

[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Algorithm PPO with humanoid env for stand_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1597708)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #1: Total reward: 6.378438907747738
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #2: Total reward: 1.4296820211173922
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #3: Total reward: 1.1410798228266512
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #4: Total reward: 0.32260765880698833
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #5: Total reward: 2.420650869421761
[2m[36m(ServeReplica:RLModel pid=1597708)[0m > Try #6: Total reward: 2.323561259440326
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:01,619 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:03,760 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m 2023-05-19 12:21:04,951	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m 2023-05-19 12:21:04,951	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1598131)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1598

[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Algorithm PPO with humanoid env for stand_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1598094)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #1: Total reward: 8.002555429781793
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #2: Total reward: 8.852632822557673
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #3: Total reward: 3.3527663651387503
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #4: Total reward: 6.060699840029579
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #5: Total reward: 0.4665033252176576
[2m[36m(ServeReplica:RLModel pid=1598094)[0m > Try #6: Total reward: 0.7839101731880131
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:24,619 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:26,761 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m 2023-05-19 12:21:27,948	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m 2023-05-19 12:21:27,949	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1598518)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1598

[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Algorithm PPO with humanoid env for stand_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1598479)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #1: Total reward: 0.5976692041641807
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #2: Total reward: 4.539562599664205
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #3: Total reward: 5.105635273242387
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #4: Total reward: 1.0691940664511765
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #5: Total reward: 0.8549141535147634
[2m[36m(ServeReplica:RLModel pid=1598479)[0m > Try #6: Total reward: 4.450135352942483
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:47,724 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:21:49,865 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m 2023-05-19 12:21:51,051	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m 2023-05-19 12:21:51,052	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1598907)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1598

[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Algorithm PPO with humanoid env for stand_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1598865)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #1: Total reward: 4.321445958738084
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #2: Total reward: 5.99296956045579
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #3: Total reward: 4.725798606972532
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #4: Total reward: 3.0967460665335738
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #5: Total reward: 0.759674917319886
[2m[36m(ServeReplica:RLModel pid=1598865)[0m > Try #6: Total reward: 6.40138871085005

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:10,835 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:12,977 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m 2023-05-19 12:22:14,165	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m 2023-05-19 12:22:14,165	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1599289)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1599

[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Algorithm PPO with humanoid env for stand_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1599251)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #1: Total reward: 6.838620059163517
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #2: Total reward: 4.464718968631561
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #3: Total reward: 5.786315190955507
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #4: Total reward: 6.5069606192015925
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #5: Total reward: 6.715199400469137
[2m[36m(ServeReplica:RLModel pid=1599251)[0m > Try #6: Total reward: 4.0040916789006

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:33,839 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:35,984 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1599636)[0m 2023-05-19 12:22:37,169	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1599636)[0m 2023-05-19 12:22:37,170	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1599674)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1599

[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Algorithm PPO with humanoid env for stand_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1599636)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1599636)[0m 2023-05-19 12:22:40,978	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1599636)[0m 2023-05-19 12:22:40,978	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1599900)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1599900)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1599900)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1599899)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #1: Total reward: 1.8269841740697956
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #2: Total reward: 9.727024588718757
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #3: Total reward: 5.92864289893826
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #4: Total reward: 0.8621917550615481
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #5: Total reward: 0.41984322308792854
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #6: Total reward: 7.092185099384572
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #7: Total reward: 4.037152968446853
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #8: Total reward: 4.404781196732036
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #9: Total reward: 3.967614464137377
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #10: Total reward: 1.3516875596617801
[2m[36m(ServeReplica:RLModel pid=1599636)[0m > Try #11: Total reward: 3.868901779051006
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:56,957 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:22:59,098 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1600023)[0m 2023-05-19 12:23:00,295	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1600023)[0m 2023-05-19 12:23:00,295	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1600064)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1600

[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Algorithm PPO with humanoid env for stand_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1600023)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1600023)[0m 2023-05-19 12:23:04,094	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1600023)[0m 2023-05-19 12:23:04,094	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1600283)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1600283)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1600283)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1600284)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #1: Total reward: 3.0032682300637163
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #2: Total reward: 7.734350782152511
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #3: Total reward: 1.039194914661923
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #4: Total reward: 3.9019803050690287
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #5: Total reward: 6.993871742522771
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #6: Total reward: 6.439891339050389
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #7: Total reward: 3.4029745758434973
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #8: Total reward: 5.2056051567334585
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #9: Total reward: 10.394322895315852
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #10: Total reward: 0.41209376095747097
[2m[36m(ServeReplica:RLModel pid=1600023)[0m > Try #11: Total reward: 6.123455898954465


[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:23:19,962 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:23:22,103 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m 2023-05-19 12:23:23,299	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m 2023-05-19 12:23:23,299	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1600443)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1600

[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Algorithm PPO with humanoid env for stand_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1600406)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #1: Total reward: 5.384121839707628
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #2: Total reward: 3.0660935737133115
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #3: Total reward: 8.381004462497678
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #4: Total reward: 2.579848215792397
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #5: Total reward: 6.768103151251771
[2m[36m(ServeReplica:RLModel pid=1600406)[0m > Try #6: Total reward: 3.0628535185164

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:23:43,073 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:23:45,213 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m 2023-05-19 12:23:46,399	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m 2023-05-19 12:23:46,399	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1600833)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1600

[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Algorithm PPO with humanoid env for stand_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1600792)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #1: Total reward: 10.030733824826518
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #2: Total reward: 7.562448851531374
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #3: Total reward: 4.053723393423348
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #4: Total reward: 0.8862524553881121
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #5: Total reward: 7.430778751674096
[2m[36m(ServeReplica:RLModel pid=1600792)[0m > Try #6: Total reward: 1.4636659034353845
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:06,165 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:08,304 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1601176)[0m 2023-05-19 12:24:09,494	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1601176)[0m 2023-05-19 12:24:09,494	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1601215)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1601

[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Algorithm PPO with humanoid env for stand_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1601176)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.


[2m[36m(ServeReplica:RLModel pid=1601176)[0m 2023-05-19 12:24:13,249	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1601176)[0m 2023-05-19 12:24:13,249	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1601436)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1601436)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1601436)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1601437)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #1: Total reward: 3.0505832757974933
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #2: Total reward: 2.8319452527150832
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #3: Total reward: 0.16771464768489824
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #4: Total reward: 3.826559249972131
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #5: Total reward: 5.591133502426632
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #6: Total reward: 3.360656861764812
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #7: Total reward: 7.425009614140072
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #8: Total reward: 2.621628007136632
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #9: Total reward: 0.7191396580599749
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #10: Total reward: 8.884984488439079
[2m[36m(ServeReplica:RLModel pid=1601176)[0m > Try #11: Total reward: 7.724630935422483
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:29,255 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:31,396 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m 2023-05-19 12:24:32,598	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m 2023-05-19 12:24:32,598	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1601601)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1601

[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Algorithm PPO with humanoid env for stand_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1601559)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #1: Total reward: 0.13746832823245894
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #2: Total reward: 5.097308432838926
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #3: Total reward: 9.421546933900876
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #4: Total reward: 1.4002557805075542
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #5: Total reward: 0.7457791315689517
[2m[36m(ServeReplica:RLModel pid=1601559)[0m > Try #6: Total reward: 5.3561408005760756
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:53,280 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:24:55,423 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m 2023-05-19 12:24:56,608	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m 2023-05-19 12:24:56,609	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1601986)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1601

[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Algorithm PPO with humanoid env for stand_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1601947)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #1: Total reward: 1.5367132516942383
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #2: Total reward: 3.6175227050143905
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #3: Total reward: 2.274574261353208
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #4: Total reward: 5.070172950322407
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #5: Total reward: 6.304949868430648
[2m[36m(ServeReplica:RLModel pid=1601947)[0m > Try #6: Total reward: 6.717186182764403
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:25:16,386 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:25:18,529 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m 2023-05-19 12:25:19,713	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m 2023-05-19 12:25:19,714	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1602371)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1602

[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Algorithm PPO with humanoid env for stand_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1602332)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #1: Total reward: 5.836260037323209
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #2: Total reward: 0.416718313147896
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #3: Total reward: 9.715975863752528
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #4: Total reward: 8.689790782373553
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #5: Total reward: 7.628292109821856
[2m[36m(ServeReplica:RLModel pid=1602332)[0m > Try #6: Total reward: 7.340369628898613
[2m[36m(S

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:25:39,377 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:25:41,518 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1602714)[0m 2023-05-19 12:25:42,705	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1602714)[0m 2023-05-19 12:25:42,706	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1602753)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1602

[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Algorithm PPO with humanoid env for stand_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1602714)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.


[2m[36m(ServeReplica:RLModel pid=1602714)[0m 2023-05-19 12:25:46,501	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1602714)[0m 2023-05-19 12:25:46,501	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1602976)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1602976)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1602976)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1602975)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #1: Total reward: 0.82581245027126
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #2: Total reward: 4.1263412746839006
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #3: Total reward: 0.19760356303257282
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #4: Total reward: 0.19494683718303324
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #5: Total reward: 2.3742540020563783
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #6: Total reward: 4.828991681528616
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #7: Total reward: 5.841880121088746
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #8: Total reward: 2.89010362934598
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #9: Total reward: 9.786971274988792
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #10: Total reward: 4.004154935828271
[2m[36m(ServeReplica:RLModel pid=1602714)[0m > Try #11: Total reward: 7.477000047652402
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:02,478 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:04,621 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1603099)[0m 2023-05-19 12:26:05,816	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1603099)[0m 2023-05-19 12:26:05,816	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1603139)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1603

[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Algorithm PPO with humanoid env for stand_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1603099)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.


[2m[36m(ServeReplica:RLModel pid=1603099)[0m 2023-05-19 12:26:09,589	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1603099)[0m 2023-05-19 12:26:09,589	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1603361)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1603361)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1603361)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1603360)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #1: Total reward: 2.1033344358107793
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #2: Total reward: 7.852713990944069
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #3: Total reward: 1.9572700841363293
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #4: Total reward: 6.792998304756228
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #5: Total reward: 6.828146147704493
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #6: Total reward: 6.241878156946474
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #7: Total reward: 2.7149086703067216
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #8: Total reward: 6.876818935598333
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #9: Total reward: 4.27566303258998
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #10: Total reward: 2.285166002618893
[2m[36m(ServeReplica:RLModel pid=1603099)[0m > Try #11: Total reward: 3.266448701169809
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:25,579 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:27,721 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m 2023-05-19 12:26:28,908	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m 2023-05-19 12:26:28,909	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1603524)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1603

[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Algorithm PPO with humanoid env for stand_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1603483)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #1: Total reward: 0.8398869078584756
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #2: Total reward: 10.51943399619588
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #3: Total reward: 5.87044396550519
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #4: Total reward: 9.450447178771807
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #5: Total reward: 7.646429543446719
[2m[36m(ServeReplica:RLModel pid=1603483)[0m > Try #6: Total reward: 7.072722768927985
[2m[36m(S

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:48,584 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:26:50,727 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m 2023-05-19 12:26:51,916	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m 2023-05-19 12:26:51,916	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1603904)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1603

[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Algorithm PPO with humanoid env for stand_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1603868)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #1: Total reward: 3.513908712999649
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #2: Total reward: 4.737245689230022
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #3: Total reward: 4.4515268630529485
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #4: Total reward: 7.635670481978708
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #5: Total reward: 4.005258013916291
[2m[36m(ServeReplica:RLModel pid=1603868)[0m > Try #6: Total reward: 6.704524489394718
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:27:11,674 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:27:13,817 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m 2023-05-19 12:27:15,014	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m 2023-05-19 12:27:15,015	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1604295)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1604

[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Algorithm PPO with humanoid env for stand_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1604255)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #1: Total reward: 5.4852758661455185
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #2: Total reward: 0.4460370078577646
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #3: Total reward: 10.165512500265878
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #4: Total reward: 3.5040634970178806
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #5: Total reward: 4.021173223556228
[2m[36m(ServeReplica:RLModel pid=1604255)[0m > Try #6: Total reward: 4.14875585126095
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:27:34,785 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:27:36,926 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m 2023-05-19 12:27:38,138	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m 2023-05-19 12:27:38,138	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1604680)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1604

[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Algorithm PPO with humanoid env for stand_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1604639)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #1: Total reward: 10.169157082439915
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #2: Total reward: 10.113405553599295
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #3: Total reward: 0.9622774014250702
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #4: Total reward: 3.643425061623665
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #5: Total reward: 3.8886756979399393
[2m[36m(ServeReplica:RLModel pid=1604639)[0m > Try #6: Total reward: 5.551335114312245
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:27:58,771 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:28:00,910 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m 2023-05-19 12:28:02,109	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m 2023-05-19 12:28:02,110	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1605073)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1605

[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Algorithm PPO with humanoid env for stand_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1605032)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #1: Total reward: 2.5786940262857643
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #2: Total reward: 2.0215332775711357
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #3: Total reward: 8.386440849448046
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #4: Total reward: 5.876764935705281
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #5: Total reward: 2.133118746305605
[2m[36m(ServeReplica:RLModel pid=1605032)[0m > Try #6: Total reward: 2.858510552377144
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:28:21,858 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:28:24,003 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m 2023-05-19 12:28:25,189	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m 2023-05-19 12:28:25,190	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1605457)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1605

[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Algorithm PPO with humanoid env for stand_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1605419)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #1: Total reward: 6.389672013098853
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #2: Total reward: 3.910992945630507
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #3: Total reward: 1.161810120908447
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #4: Total reward: 2.286578439670205
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #5: Total reward: 1.0527323141845484
[2m[36m(ServeReplica:RLModel pid=1605419)[0m > Try #6: Total reward: 5.655875935747137
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:28:44,958 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:28:47,105 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1605803)[0m 2023-05-19 12:28:48,296	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1605803)[0m 2023-05-19 12:28:48,297	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1605838)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1605

[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Algorithm PPO with humanoid env for stand_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1605803)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.


[2m[36m(ServeReplica:RLModel pid=1605803)[0m 2023-05-19 12:28:52,090	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1605803)[0m 2023-05-19 12:28:52,090	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1606068)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1606068)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1606068)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1606067)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #1: Total reward: 5.213056310690234
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #2: Total reward: 10.5150248523519
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #3: Total reward: 0.2913256611147973
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #4: Total reward: 10.939028924931629
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #5: Total reward: 6.8495602594768545
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #6: Total reward: 2.0313354438100024
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #7: Total reward: 4.317367220710297
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #8: Total reward: 0.6262301507075085
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #9: Total reward: 6.323563392223005
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #10: Total reward: 6.425148688781283
[2m[36m(ServeReplica:RLModel pid=1605803)[0m > Try #11: Total reward: 1.9259510508206172
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:08,059 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:10,200 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m 2023-05-19 12:29:11,398	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m 2023-05-19 12:29:11,399	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1606228)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1606

[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Algorithm PPO with humanoid env for stand_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1606190)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #1: Total reward: 6.513375874746386
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #2: Total reward: 2.6618238813993833
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #3: Total reward: 9.537432183400446
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #4: Total reward: 1.6935995236247692
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #5: Total reward: 2.7902358683365693
[2m[36m(ServeReplica:RLModel pid=1606190)[0m > Try #6: Total reward: 5.680558622675313
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:31,052 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:33,194 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1606573)[0m 2023-05-19 12:29:34,394	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1606573)[0m 2023-05-19 12:29:34,395	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1606614)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1606

[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Algorithm PPO with humanoid env for stand_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1606573)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1606573)[0m 2023-05-19 12:29:38,200	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1606573)[0m 2023-05-19 12:29:38,201	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1606837)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1606837)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1606837)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1606838)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #1: Total reward: 7.094561208215976
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #2: Total reward: 5.508658146328781
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #3: Total reward: 2.1867944067042226
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #4: Total reward: 2.7272080395638265
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #5: Total reward: 5.2383713423217
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #6: Total reward: 3.587026888007053
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #7: Total reward: 9.527225044534822
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #8: Total reward: 4.809892446307581
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #9: Total reward: 6.766122366287976
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #10: Total reward: 3.3239026344458473
[2m[36m(ServeReplica:RLModel pid=1606573)[0m > Try #11: Total reward: 2.3801700285860825
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:54,126 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:29:56,267 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m 2023-05-19 12:29:57,462	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m 2023-05-19 12:29:57,462	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1606999)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1606

[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Algorithm PPO with humanoid env for stand_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1606959)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #1: Total reward: 3.848900860823318
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #2: Total reward: 3.650766357078371
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #3: Total reward: 0.6204538957410607
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #4: Total reward: 6.663887887914815
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #5: Total reward: 0.6322395109538815
[2m[36m(ServeReplica:RLModel pid=1606959)[0m > Try #6: Total reward: 3.792560767716

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:30:17,217 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:30:19,361 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1607346)[0m 2023-05-19 12:30:20,550	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1607346)[0m 2023-05-19 12:30:20,551	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1607388)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1607

[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Algorithm PPO with humanoid env for stand_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1607346)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1607346)[0m 2023-05-19 12:30:24,311	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1607346)[0m 2023-05-19 12:30:24,311	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1607346)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1607346)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1607346)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1607608)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #1: Total reward: 5.271493930330155
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #2: Total reward: 3.9961112360809694
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #3: Total reward: 5.556543563410541
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #4: Total reward: 6.168888243358134
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #5: Total reward: 7.750003879715144
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #6: Total reward: 4.848875215914817
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #7: Total reward: 3.164249481538864
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #8: Total reward: 2.9636977453119675
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #9: Total reward: 6.037861988740009
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #10: Total reward: 11.609708125596296
[2m[36m(ServeReplica:RLModel pid=1607346)[0m > Try #11: Total reward: 3.3148236845609667
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:30:40,305 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:30:42,447 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1607730)[0m 2023-05-19 12:30:43,636	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1607730)[0m 2023-05-19 12:30:43,636	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1607769)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1607

[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Algorithm PPO with humanoid env for stand_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1607730)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1607730)[0m 2023-05-19 12:30:47,427	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1607730)[0m 2023-05-19 12:30:47,427	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1607992)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1607992)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1607992)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1607993)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #1: Total reward: 6.307039410264457
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #2: Total reward: 2.0315315110212144
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #3: Total reward: 5.52908158832364
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #4: Total reward: 4.974600015302249
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #5: Total reward: 6.065135579588235
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #6: Total reward: 1.592743619514311
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #7: Total reward: 3.422530709341456
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #8: Total reward: 6.579353813265303
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #9: Total reward: 5.168342960027237
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #10: Total reward: 4.373910621773838
[2m[36m(ServeReplica:RLModel pid=1607730)[0m > Try #11: Total reward: 3.0805953020255323
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:03,392 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:05,535 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m 2023-05-19 12:31:06,725	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m 2023-05-19 12:31:06,726	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1608154)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1608

[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Algorithm PPO with humanoid env for stand_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1608116)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #1: Total reward: 2.105762151161262
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #2: Total reward: 3.1979344616436363
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #3: Total reward: 2.8959927633212845
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #4: Total reward: 4.238698221143877
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #5: Total reward: 3.739040701352805
[2m[36m(ServeReplica:RLModel pid=1608116)[0m > Try #6: Total reward: 4.089921145539

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:26,374 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:28,516 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m 2023-05-19 12:31:29,702	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m 2023-05-19 12:31:29,702	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1608539)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1608

[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Algorithm PPO with humanoid env for stand_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1608501)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #1: Total reward: 1.4234878692536452
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #2: Total reward: 0.35975637981503594
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #3: Total reward: 4.744953546686745
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #4: Total reward: 11.368671799278019
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #5: Total reward: 3.322134314679064
[2m[36m(ServeReplica:RLModel pid=1608501)[0m > Try #6: Total reward: 3.865403747516626
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:49,457 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:31:51,598 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1608885)[0m 2023-05-19 12:31:52,793	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1608885)[0m 2023-05-19 12:31:52,793	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1608927)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1608

[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Algorithm PPO with humanoid env for stand_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1608885)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.


[2m[36m(ServeReplica:RLModel pid=1608885)[0m 2023-05-19 12:31:56,529	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1608885)[0m 2023-05-19 12:31:56,529	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1609147)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1609147)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1609147)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1609146)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #1: Total reward: 5.5818769949027
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #2: Total reward: 3.694582737397696
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #3: Total reward: 0.5924893010171479
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #4: Total reward: 6.485231706354027
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #5: Total reward: 2.9487518996700293
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #6: Total reward: 10.39894002822425
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #7: Total reward: 6.271580748216123
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #8: Total reward: 3.57488428953834
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #9: Total reward: 8.972794972455874
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #10: Total reward: 10.417927390673627
[2m[36m(ServeReplica:RLModel pid=1608885)[0m > Try #11: Total reward: 4.685301983723233
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:32:12,538 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:32:14,675 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m 2023-05-19 12:32:15,895	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m 2023-05-19 12:32:15,896	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1609311)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1609

[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Algorithm PPO with humanoid env for stand_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1609269)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #1: Total reward: 3.713610669235348
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #2: Total reward: 10.570076072270789
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #3: Total reward: 8.444125923015733
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #4: Total reward: 8.67658841830701
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #5: Total reward: 0.19806436766134242
[2m[36m(ServeReplica:RLModel pid=1609269)[0m > Try #6: Total reward: 7.274893560641565
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:32:35,629 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:32:37,772 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1609652)[0m 2023-05-19 12:32:38,960	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1609652)[0m 2023-05-19 12:32:38,960	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1609688)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1609

[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Algorithm PPO with humanoid env for stand_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1609652)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.


[2m[36m(ServeReplica:RLModel pid=1609652)[0m 2023-05-19 12:32:42,854	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1609652)[0m 2023-05-19 12:32:42,854	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1609914)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1609914)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1609914)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1609915)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #1: Total reward: 11.95674767619807
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #2: Total reward: 4.331551214210221
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #3: Total reward: 2.1755470789157707
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #4: Total reward: 1.1024931656232815
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #5: Total reward: 2.354588599475161
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #6: Total reward: 4.300766794343879
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #7: Total reward: 8.452253448626125
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #8: Total reward: 5.83338709990108
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #9: Total reward: 11.260987264729497
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #10: Total reward: 9.864972913010408
[2m[36m(ServeReplica:RLModel pid=1609652)[0m > Try #11: Total reward: 10.214168362688227
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:32:58,717 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:33:00,857 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m 2023-05-19 12:33:02,066	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m 2023-05-19 12:33:02,067	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1610076)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1610

[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Algorithm PPO with humanoid env for stand_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1610041)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #1: Total reward: 10.259024944885653
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #2: Total reward: 0.1992710365055318
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #3: Total reward: 5.057329385184476
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #4: Total reward: 2.224233437116596
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #5: Total reward: 1.860656578313586
[2m[36m(ServeReplica:RLModel pid=1610041)[0m > Try #6: Total reward: 1.2899204069098045
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:33:22,712 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:33:24,852 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m 2023-05-19 12:33:26,057	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m 2023-05-19 12:33:26,057	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1610469)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1610

[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Algorithm PPO with humanoid env for stand_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1610427)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #1: Total reward: 10.167192471743391
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #2: Total reward: 3.0469543139019146
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #3: Total reward: 2.1099768946318544
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #4: Total reward: 3.4331677853351374
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #5: Total reward: 7.737243580637012
[2m[36m(ServeReplica:RLModel pid=1610427)[0m > Try #6: Total reward: 5.74327723014653
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:33:45,813 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:33:47,951 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1610812)[0m 2023-05-19 12:33:49,150	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1610812)[0m 2023-05-19 12:33:49,151	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1610849)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1610

[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Algorithm PPO with humanoid env for stand_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1610812)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.


[2m[36m(ServeReplica:RLModel pid=1610812)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1610812)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1610812)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #1: Total reward: 0.3452996485073135
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #2: Total reward: 0.5833841096537982
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #3: Total reward: 6.612142425070097
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #4: Total reward: 6.540027593143714
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #5: Total reward: 9.548093496839867
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #6: Total reward: 5.359545753946553
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #7: Total reward: 4.656810953862011
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #8: Total reward: 2.6548502112288017
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #9: Total reward: 10.14626981577059
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #10: Total reward: 8.714719544769348
[2m[36m(ServeReplica:RLModel pid=1610812)[0m > Try #11: Total reward: 5.834300250811703
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:08,896 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:11,037 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m 2023-05-19 12:34:12,227	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m 2023-05-19 12:34:12,227	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1611257)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1611

[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Algorithm PPO with humanoid env for stand_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1611215)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #1: Total reward: 8.186663430866673
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #2: Total reward: 6.497844411839199
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #3: Total reward: 5.769412116375417
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #4: Total reward: 6.008936615151784
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #5: Total reward: 5.411529368429132
[2m[36m(ServeReplica:RLModel pid=1611215)[0m > Try #6: Total reward: 5.56620362238398
[2m[36m(S

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:31,973 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:34,114 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m 2023-05-19 12:34:35,314	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m 2023-05-19 12:34:35,315	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1611634)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1611

[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Algorithm PPO with humanoid env for stand_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1611601)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #1: Total reward: 3.5044863124286914
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #2: Total reward: 4.542864295237984
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #3: Total reward: 6.075523587395984
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #4: Total reward: 3.1435843406845843
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #5: Total reward: 1.8683480924340712
[2m[36m(ServeReplica:RLModel pid=1611601)[0m > Try #6: Total reward: 7.74321188057899
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:55,059 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:34:57,200 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m 2023-05-19 12:34:58,396	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m 2023-05-19 12:34:58,396	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1612024)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1612

[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Algorithm PPO with humanoid env for stand_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1611983)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #1: Total reward: 2.2597758125756164
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #2: Total reward: 5.830924441741041
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #3: Total reward: 5.923443624327638
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #4: Total reward: 3.5117910554268157
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #5: Total reward: 11.11791918999765
[2m[36m(ServeReplica:RLModel pid=1611983)[0m > Try #6: Total reward: 8.608213306201723
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:35:18,045 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:35:20,188 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m 2023-05-19 12:35:21,402	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m 2023-05-19 12:35:21,402	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1612404)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1612

[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Algorithm PPO with humanoid env for stand_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1612367)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #1: Total reward: 6.358217448389911
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #2: Total reward: 6.243563998127713
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #3: Total reward: 5.459315076377821
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #4: Total reward: 0.5709600384494155
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #5: Total reward: 6.380962558005698
[2m[36m(ServeReplica:RLModel pid=1612367)[0m > Try #6: Total reward: 1.2496254145858061
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:35:41,146 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:35:43,288 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m 2023-05-19 12:35:44,477	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m 2023-05-19 12:35:44,478	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1612790)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1612

[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Algorithm PPO with humanoid env for stand_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1612751)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #1: Total reward: 4.86316231773847
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #2: Total reward: 0.7985882387964451
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #3: Total reward: 0.37511555823841886
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #4: Total reward: 6.999322025990292
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #5: Total reward: 2.3970273976123124
[2m[36m(ServeReplica:RLModel pid=1612751)[0m > Try #6: Total reward: 7.123576498980925
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:04,245 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:06,385 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m 2023-05-19 12:36:07,582	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m 2023-05-19 12:36:07,583	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1613177)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1613

[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Algorithm PPO with humanoid env for stand_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1613136)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #1: Total reward: 4.670733780441429
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #2: Total reward: 5.132718286950764
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #3: Total reward: 6.589166844648816
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #4: Total reward: 1.823083128432138
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #5: Total reward: 6.660143043070472
[2m[36m(ServeReplica:RLModel pid=1613136)[0m > Try #6: Total reward: 6.950030364625185
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:27,330 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:29,473 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m 2023-05-19 12:36:30,658	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m 2023-05-19 12:36:30,659	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1613560)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1613

[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Algorithm PPO with humanoid env for stand_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1613522)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #1: Total reward: 9.187315095999335
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #2: Total reward: 6.813966726135829
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #3: Total reward: 0.016550302080104683
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #4: Total reward: 8.250194030126735
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #5: Total reward: 6.57851465084842
[2m[36m(ServeReplica:RLModel pid=1613522)[0m > Try #6: Total reward: 2.2335064619953773
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:50,322 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:36:52,464 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1613907)[0m 2023-05-19 12:36:53,662	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1613907)[0m 2023-05-19 12:36:53,663	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1613941)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1613

[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Algorithm PPO with humanoid env for stand_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1613907)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.


[2m[36m(ServeReplica:RLModel pid=1613907)[0m 2023-05-19 12:36:57,455	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1613907)[0m 2023-05-19 12:36:57,455	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 493.1542057991028, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1614167)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1614167)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1614167)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1614168)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #1: Total reward: 8.717051454594333
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #2: Total reward: 7.955663041203337
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #3: Total reward: 6.05481636430226
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #4: Total reward: 4.0458914189615145
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #5: Total reward: 1.792836075844456
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #6: Total reward: 10.836884386632578
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #7: Total reward: 1.8570134948256323
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #8: Total reward: 6.111208606246852
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #9: Total reward: 0.7840214169652323
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #10: Total reward: 7.6208109190419915
[2m[36m(ServeReplica:RLModel pid=1613907)[0m > Try #11: Total reward: 2.621747882476829
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:37:13,413 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:37:15,557 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m 2023-05-19 12:37:16,753	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m 2023-05-19 12:37:16,753	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1614327)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1614

[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Algorithm PPO with humanoid env for stand_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1614291)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #1: Total reward: 0.3961946339316675
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #2: Total reward: 5.49765525100298
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #3: Total reward: 3.497550624184545
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #4: Total reward: 1.11649329523359
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #5: Total reward: 1.815748768337785
[2m[36m(ServeReplica:RLModel pid=1614291)[0m > Try #6: Total reward: 6.292841569540966
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:37:36,502 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:37:38,644 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1614675)[0m 2023-05-19 12:37:39,837	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1614675)[0m 2023-05-19 12:37:39,838	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1614716)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1614

[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Algorithm PPO with humanoid env for stand_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1614675)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1614675)[0m 2023-05-19 12:37:43,568	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1614675)[0m 2023-05-19 12:37:43,569	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1614937)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1614937)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1614937)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1614938)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #1: Total reward: 2.549110441106729
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #2: Total reward: 2.3371995739345057
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #3: Total reward: 1.5314160082283792
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #4: Total reward: 7.5426164712730595
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #5: Total reward: 4.242996117651099
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #6: Total reward: 1.0428378349932734
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #7: Total reward: 6.995031503268602
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #8: Total reward: 5.925858938719445
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #9: Total reward: 3.4357663179788007
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #10: Total reward: 8.07538740279429
[2m[36m(ServeReplica:RLModel pid=1614675)[0m > Try #11: Total reward: 0.20254387603767723
[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:37:59,526 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:38:01,667 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m 2023-05-19 12:38:02,849	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m 2023-05-19 12:38:02,849	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1615102)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1615

[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Algorithm PPO with humanoid env for stand_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1615067)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #1: Total reward: 6.296568163655376
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #2: Total reward: 4.1668038006317
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #3: Total reward: 6.605861339625159
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #4: Total reward: 3.08400008593626
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #5: Total reward: 5.4908048811786045
[2m[36m(ServeReplica:RLModel pid=1615067)[0m > Try #6: Total reward: 6.35561760957526
[2m[36m(Se

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:38:22,626 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:38:24,764 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1615454)[0m 2023-05-19 12:38:25,942	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1615454)[0m 2023-05-19 12:38:25,943	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1615492)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1615

[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Algorithm PPO with humanoid env for stand_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1615454)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1615454)[0m 2023-05-19 12:38:29,762	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1615454)[0m 2023-05-19 12:38:29,762	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1615716)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1615716)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1615716)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1615717)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #1: Total reward: 4.93021861755079
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #2: Total reward: 3.3701512141833176
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #3: Total reward: 5.288666121119023
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #4: Total reward: 5.065487988067317
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #5: Total reward: 5.193036943036936
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #6: Total reward: 1.4283348689033486
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #7: Total reward: 1.8975410291309207
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #8: Total reward: 3.0999692579927403
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #9: Total reward: 2.319923383993928
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #10: Total reward: 8.820524895104654
[2m[36m(ServeReplica:RLModel pid=1615454)[0m > Try #11: Total reward: 4.200944358475177
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:38:45,712 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:38:47,852 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1615840)[0m 2023-05-19 12:38:49,049	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1615840)[0m 2023-05-19 12:38:49,049	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1615875)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1615

[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Algorithm PPO with humanoid env for stand_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1615840)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1615840)[0m 2023-05-19 12:38:52,835	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1615840)[0m 2023-05-19 12:38:52,835	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 493.1542057991028, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1615840)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1615840)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1615840)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1616102)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #1: Total reward: 5.516733946400347
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #2: Total reward: 3.79340335873061
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #3: Total reward: 10.17988674776147
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #4: Total reward: 7.335249747138851
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #5: Total reward: 8.305534753911155
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #6: Total reward: 5.612894590329387
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #7: Total reward: 0.2202141913763994
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #8: Total reward: 5.958772759356376
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #9: Total reward: 10.618547870336219
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #10: Total reward: 4.5677940831982315
[2m[36m(ServeReplica:RLModel pid=1615840)[0m > Try #11: Total reward: 9.57791562695963
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:08,704 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:10,844 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1616224)[0m 2023-05-19 12:39:12,035	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1616224)[0m 2023-05-19 12:39:12,035	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1616259)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1616

[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Algorithm PPO with humanoid env for stand_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1616224)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1616224)[0m 2023-05-19 12:39:15,793	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1616224)[0m 2023-05-19 12:39:15,793	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1616487)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1616487)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1616487)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1616488)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #1: Total reward: 5.974386196128003
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #2: Total reward: 2.104538500175124
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #3: Total reward: 4.202708508618217
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #4: Total reward: 7.14720789122535
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #5: Total reward: 2.9441116808288363
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #6: Total reward: 9.767846366602543
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #7: Total reward: 5.576753887237243
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #8: Total reward: 7.437682012116013
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #9: Total reward: 9.921015947870101
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #10: Total reward: 5.218302632175329
[2m[36m(ServeReplica:RLModel pid=1616224)[0m > Try #11: Total reward: 5.511915187917462
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:31,789 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:33,931 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m 2023-05-19 12:39:35,120	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m 2023-05-19 12:39:35,120	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1616647)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1616

[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Algorithm PPO with humanoid env for stand_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1616609)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #1: Total reward: 10.294832296500209
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #2: Total reward: 8.211293324434715
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #3: Total reward: 5.992060379364204
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #4: Total reward: 1.106441362488954
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #5: Total reward: 3.289318482641493
[2m[36m(ServeReplica:RLModel pid=1616609)[0m > Try #6: Total reward: 5.30689353446

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:54,886 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:39:57,025 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m 2023-05-19 12:39:58,221	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m 2023-05-19 12:39:58,221	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1617031)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1617

[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Algorithm PPO with humanoid env for stand_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1616995)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #1: Total reward: 5.21397125602064
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #2: Total reward: 2.111890946520464
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #3: Total reward: 7.587040841876663
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #4: Total reward: 4.524191028071969
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #5: Total reward: 4.239844832052387
[2m[36m(ServeReplica:RLModel pid=1616995)[0m > Try #6: Total reward: 6.9632582537933

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:40:17,976 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:40:20,117 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m 2023-05-19 12:40:21,321	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m 2023-05-19 12:40:21,321	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1617420)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1617

[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Algorithm PPO with humanoid env for stand_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1617381)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #1: Total reward: 11.027656064096377
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #2: Total reward: 7.786892346812542
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #3: Total reward: 4.049644675782473
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #4: Total reward: 2.728812341449143
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #5: Total reward: 4.198894800864608
[2m[36m(ServeReplica:RLModel pid=1617381)[0m > Try #6: Total reward: 4.02565969668

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:40:40,976 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:40:43,125 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1617765)[0m 2023-05-19 12:40:44,327	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1617765)[0m 2023-05-19 12:40:44,328	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1617805)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1617

[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Algorithm PPO with humanoid env for stand_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1617765)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1617765)[0m 2023-05-19 12:40:48,098	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1617765)[0m 2023-05-19 12:40:48,098	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 493.1542057991028, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1618025)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1618025)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1618025)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1618026)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #1: Total reward: 4.339848346064604
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #2: Total reward: 4.721548329280876
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #3: Total reward: 2.7675220789087205
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #4: Total reward: 0.3621870395244486
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #5: Total reward: 1.3858880927801642
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #6: Total reward: 2.3420839462704577
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #7: Total reward: 5.114144795601809
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #8: Total reward: 0.10413105607212991
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #9: Total reward: 6.755706820710819
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #10: Total reward: 3.453498288925576
[2m[36m(ServeReplica:RLModel pid=1617765)[0m > Try #11: Total reward: 6.532075835330863
[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:04,078 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:06,223 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m 2023-05-19 12:41:07,416	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m 2023-05-19 12:41:07,417	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1618184)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1618

[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Algorithm PPO with humanoid env for stand_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1618148)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #1: Total reward: 8.186709851739057
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #2: Total reward: 4.818965180762226
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #3: Total reward: 0.6732238119994562
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #4: Total reward: 1.7713781789830556
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #5: Total reward: 7.280751586140658
[2m[36m(ServeReplica:RLModel pid=1618148)[0m > Try #6: Total reward: 1.3486597490

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:27,174 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:29,316 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m 2023-05-19 12:41:30,516	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m 2023-05-19 12:41:30,516	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1618567)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1618

[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Algorithm PPO with humanoid env for stand_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1618531)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #1: Total reward: 8.426100109593687
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #2: Total reward: 6.805724478169852
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #3: Total reward: 6.96458559336245
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #4: Total reward: 6.613206325558661
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #5: Total reward: 4.06944578461324
[2m[36m(ServeReplica:RLModel pid=1618531)[0m > Try #6: Total reward: 1.31332268836436

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:51,172 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:41:53,309 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m 2023-05-19 12:41:54,498	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m 2023-05-19 12:41:54,499	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1618952)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1618

[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Algorithm PPO with humanoid env for stand_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1618917)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #1: Total reward: 6.0116867927900355
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #2: Total reward: 1.777806756164015
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #3: Total reward: 2.1959113276602915
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #4: Total reward: 10.794475514113886
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #5: Total reward: 3.476397959910925
[2m[36m(ServeReplica:RLModel pid=1618917)[0m > Try #6: Total reward: 2.885612590

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:42:14,261 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:42:16,403 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1619300)[0m 2023-05-19 12:42:17,590	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1619300)[0m 2023-05-19 12:42:17,590	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1619340)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1619

[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Algorithm PPO with humanoid env for stand_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1619300)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1619300)[0m 2023-05-19 12:42:21,350	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1619300)[0m 2023-05-19 12:42:21,350	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1619560)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1619560)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1619560)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1619561)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #1: Total reward: 1.078701488761352
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #2: Total reward: 9.184431744787924
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #3: Total reward: 4.085592549239911
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #4: Total reward: 9.241809051890206
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #5: Total reward: 8.579486446512234
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #6: Total reward: 0.07327231132161123
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #7: Total reward: 9.1298841591167
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #8: Total reward: 8.054759751748232
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #9: Total reward: 5.822654986488434
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #10: Total reward: 7.312337048379422
[2m[36m(ServeReplica:RLModel pid=1619300)[0m > Try #11: Total reward: 4.723250120460765
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:42:37,345 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:42:39,488 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m 2023-05-19 12:42:40,690	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m 2023-05-19 12:42:40,690	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1619720)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1619

[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Algorithm PPO with humanoid env for stand_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1619682)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #1: Total reward: 8.578237203474547
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #2: Total reward: 1.9737935414503036
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #3: Total reward: 1.1052012741954025
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #4: Total reward: 4.988156380481505
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #5: Total reward: 13.59324731620877
[2m[36m(ServeReplica:RLModel pid=1619682)[0m > Try #6: Total reward: 7.0556449562

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:00,446 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:02,590 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1620068)[0m 2023-05-19 12:43:03,779	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1620068)[0m 2023-05-19 12:43:03,779	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1620110)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1620

[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Algorithm PPO with humanoid env for stand_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1620068)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1620068)[0m 2023-05-19 12:43:07,614	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1620068)[0m 2023-05-19 12:43:07,614	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1620329)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1620329)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1620329)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1620330)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #1: Total reward: 3.252744519383853
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #2: Total reward: 8.805537342772293
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #3: Total reward: 3.113575208883143
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #4: Total reward: 0.18660001096677786
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #5: Total reward: 2.6393567475753787
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #6: Total reward: 4.176495947200034
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #7: Total reward: 3.1129037211755515
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #8: Total reward: 2.701334872863926
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #9: Total reward: 3.175278243336064
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #10: Total reward: 3.440537250085985
[2m[36m(ServeReplica:RLModel pid=1620068)[0m > Try #11: Total reward: 4.7096846519170095
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:23,449 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:25,591 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m 2023-05-19 12:43:26,795	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m 2023-05-19 12:43:26,795	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1620492)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1620

[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Algorithm PPO with humanoid env for stand_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1620452)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #1: Total reward: 10.558246139987096
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #2: Total reward: 2.6561614053850184
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #3: Total reward: 2.8314881623737596
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #4: Total reward: 6.453147417164243
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #5: Total reward: 4.031516452513728
[2m[36m(ServeReplica:RLModel pid=1620452)[0m > Try #6: Total reward: 0.865051086

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:46,540 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:43:48,679 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1620837)[0m 2023-05-19 12:43:49,864	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1620837)[0m 2023-05-19 12:43:49,864	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1620880)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1620

[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Algorithm PPO with humanoid env for stand_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1620837)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1620837)[0m 2023-05-19 12:43:53,765	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1620837)[0m 2023-05-19 12:43:53,765	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1621099)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1621099)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1621099)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1621100)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #1: Total reward: 0.40555491066291466
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #2: Total reward: 10.71244852719903
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #3: Total reward: 2.0350601244736524
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #4: Total reward: 1.6111564677910004
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #5: Total reward: 2.632656803951362
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #6: Total reward: 5.82976400361464
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #7: Total reward: 1.4438689189908023
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #8: Total reward: 9.48256889419884
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #9: Total reward: 2.777703953794384
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #10: Total reward: 4.499738056981747
[2m[36m(ServeReplica:RLModel pid=1620837)[0m > Try #11: Total reward: 6.354861480656855
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:09,632 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:11,775 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m 2023-05-19 12:44:12,964	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m 2023-05-19 12:44:12,964	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1621257)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1621

[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Algorithm PPO with humanoid env for stand_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1621222)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #1: Total reward: 5.559184739307102
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #2: Total reward: 5.574757472041053
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #3: Total reward: 3.129401374236839
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #4: Total reward: 5.997263575805974
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #5: Total reward: 7.054004394023591
[2m[36m(ServeReplica:RLModel pid=1621222)[0m > Try #6: Total reward: 1.459058730080

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:32,710 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:34,853 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m 2023-05-19 12:44:36,052	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m 2023-05-19 12:44:36,052	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1621645)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1621

[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Algorithm PPO with humanoid env for stand_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1621607)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #1: Total reward: 2.4760209731056766
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #2: Total reward: 5.7152709285055945
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #3: Total reward: 9.186448250262911
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #4: Total reward: 6.106905379302463
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #5: Total reward: 7.057600552419408
[2m[36m(ServeReplica:RLModel pid=1621607)[0m > Try #6: Total reward: 12.127931119

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:55,786 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:44:57,927 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m 2023-05-19 12:44:59,118	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m 2023-05-19 12:44:59,119	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1622027)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1622

[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Algorithm PPO with humanoid env for stand_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1621989)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #1: Total reward: 3.9921639015247865
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #2: Total reward: 4.380388160252817
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #3: Total reward: 9.978172515012446
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #4: Total reward: 2.0043805941829587
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #5: Total reward: 0.63329853134563
[2m[36m(ServeReplica:RLModel pid=1621989)[0m > Try #6: Total reward: 5.66593542080

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:45:18,782 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:45:20,925 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1622373)[0m 2023-05-19 12:45:22,122	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1622373)[0m 2023-05-19 12:45:22,122	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1622407)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1622

[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Algorithm PPO with humanoid env for stand_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1622373)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1622373)[0m 2023-05-19 12:45:25,916	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1622373)[0m 2023-05-19 12:45:25,916	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1622634)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1622634)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1622634)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1622635)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #1: Total reward: 5.948071397618465
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #2: Total reward: 4.634912655030337
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #3: Total reward: 4.062451629426056
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #4: Total reward: 6.856608030193192
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #5: Total reward: 6.51550799379543
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #6: Total reward: 5.722985333032216
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #7: Total reward: 4.697934828844108
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #8: Total reward: 4.640770010771835
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #9: Total reward: 0.4741751089388715
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #10: Total reward: 0.6109898482590922
[2m[36m(ServeReplica:RLModel pid=1622373)[0m > Try #11: Total reward: 7.631121747912882
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:45:41,886 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:45:44,029 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1622757)[0m 2023-05-19 12:45:45,222	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1622757)[0m 2023-05-19 12:45:45,222	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1622798)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1622

[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Algorithm PPO with humanoid env for stand_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1622757)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1622757)[0m 2023-05-19 12:45:49,029	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1622757)[0m 2023-05-19 12:45:49,029	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1623018)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1623018)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1623018)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1623017)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #1: Total reward: 6.392494286088757
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #2: Total reward: 7.033970993491773
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #3: Total reward: 8.893620955429004
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #4: Total reward: 10.863225812123197
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #5: Total reward: 0.6334258090379374
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #6: Total reward: 5.225606594780425
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #7: Total reward: 5.594076590805864
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #8: Total reward: 7.991935681526788
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #9: Total reward: 4.850214809229402
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #10: Total reward: 6.679186448932006
[2m[36m(ServeReplica:RLModel pid=1622757)[0m > Try #11: Total reward: 0.9557915858139703
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:04,977 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:07,117 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m 2023-05-19 12:46:08,309	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m 2023-05-19 12:46:08,310	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1623177)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1623

[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Algorithm PPO with humanoid env for stand_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1623141)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #1: Total reward: 5.585703025456065
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #2: Total reward: 5.135728222518469
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #3: Total reward: 2.0977277309892357
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #4: Total reward: 10.571664770092777
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #5: Total reward: 3.657620773834986
[2m[36m(ServeReplica:RLModel pid=1623141)[0m > Try #6: Total reward: 5.6319940074

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:27,966 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:30,106 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m 2023-05-19 12:46:31,297	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m 2023-05-19 12:46:31,297	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1623564)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1623

[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Algorithm PPO with humanoid env for stand_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1623525)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #1: Total reward: 7.399056920388954
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #2: Total reward: 6.286193412996852
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #3: Total reward: 1.6751772161752587
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #4: Total reward: 5.085999721928971
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #5: Total reward: 2.5444927842711214
[2m[36m(ServeReplica:RLModel pid=1623525)[0m > Try #6: Total reward: 6.4799057829

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:51,067 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:46:53,210 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1623911)[0m 2023-05-19 12:46:54,411	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1623911)[0m 2023-05-19 12:46:54,412	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1623948)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1623

[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Algorithm PPO with humanoid env for stand_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1623911)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1623911)[0m 2023-05-19 12:46:58,231	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1623911)[0m 2023-05-19 12:46:58,231	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1624172)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1624172)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1624172)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1624173)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #1: Total reward: 0.2355837576586587
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #2: Total reward: 9.033706048353034
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #3: Total reward: 4.526230057018518
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #4: Total reward: 6.913101409182667
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #5: Total reward: 4.702353449445334
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #6: Total reward: 6.913117223775071
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #7: Total reward: 1.407444058383228
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #8: Total reward: 10.582859665082507
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #9: Total reward: 7.545006515299189
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #10: Total reward: 6.6796837734935774
[2m[36m(ServeReplica:RLModel pid=1623911)[0m > Try #11: Total reward: 6.43340562834148
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:47:14,165 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:47:16,305 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1624295)[0m 2023-05-19 12:47:17,496	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1624295)[0m 2023-05-19 12:47:17,496	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1624329)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1624

[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Algorithm PPO with humanoid env for stand_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1624295)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1624295)[0m 2023-05-19 12:47:21,285	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1624295)[0m 2023-05-19 12:47:21,285	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1624295)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1624295)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1624295)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1624559)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #1: Total reward: 0.09279432565456196
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #2: Total reward: 3.675608268945118
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #3: Total reward: 7.372514625645876
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #4: Total reward: 4.103761351344157
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #5: Total reward: 10.671818837945397
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #6: Total reward: 1.7076402305525664
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #7: Total reward: 1.338413408735122
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #8: Total reward: 3.696933961463497
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #9: Total reward: 4.217053238649199
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #10: Total reward: 5.564072547009149
[2m[36m(ServeReplica:RLModel pid=1624295)[0m > Try #11: Total reward: 9.675780051167088
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:47:38,263 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:47:40,401 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m 2023-05-19 12:47:41,603	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m 2023-05-19 12:47:41,604	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1624718)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1624

[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Algorithm PPO with humanoid env for stand_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1624684)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #1: Total reward: 2.4087500485253477
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #2: Total reward: 8.218497071654994
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #3: Total reward: 8.536561787102215
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #4: Total reward: 3.961748470333853
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #5: Total reward: 5.289717022989029
[2m[36m(ServeReplica:RLModel pid=1624684)[0m > Try #6: Total reward: 4.249658587808687
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:01,319 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:03,460 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1625076)[0m 2023-05-19 12:48:04,650	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1625076)[0m 2023-05-19 12:48:04,650	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1625110)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1625

[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Algorithm PPO with humanoid env for stand_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1625076)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1625076)[0m 2023-05-19 12:48:08,471	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1625076)[0m 2023-05-19 12:48:08,471	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 468.6909394264221, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1625336)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1625336)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1625336)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1625337)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #1: Total reward: 2.6721585819526688
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #2: Total reward: 0.707178042835595
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #3: Total reward: 2.256364104266184
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #4: Total reward: 3.127743553983318
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #5: Total reward: 9.026124709440001
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #6: Total reward: 5.956861603091252
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #7: Total reward: 9.54805055920713
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #8: Total reward: 5.541719858739197
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #9: Total reward: 4.337051900942085
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #10: Total reward: 3.769356717878768
[2m[36m(ServeReplica:RLModel pid=1625076)[0m > Try #11: Total reward: 3.602180084959452
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:25,339 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:27,478 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1625464)[0m 2023-05-19 12:48:28,666	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1625464)[0m 2023-05-19 12:48:28,667	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1625503)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1625

[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Algorithm PPO with humanoid env for stand_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1625464)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1625464)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1625464)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1625464)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #1: Total reward: 9.561157952960325
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #2: Total reward: 4.107033492685245
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #3: Total reward: 9.082349848998499
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #4: Total reward: 3.9474603008630686
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #5: Total reward: 3.937201305967785
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #6: Total reward: 0.30765463330059284
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #7: Total reward: 9.71101434127789
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #8: Total reward: 6.562069664388874
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #9: Total reward: 8.901371633689998
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #10: Total reward: 4.0652758117837084
[2m[36m(ServeReplica:RLModel pid=1625464)[0m > Try #11: Total reward: 8.422605760723204
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:48,421 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:48:50,563 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1625851)[0m 2023-05-19 12:48:51,758	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1625851)[0m 2023-05-19 12:48:51,758	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1625892)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1625

[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Algorithm PPO with humanoid env for stand_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1625851)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1625851)[0m 2023-05-19 12:48:55,505	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1625851)[0m 2023-05-19 12:48:55,505	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1626111)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1626111)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1626111)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1626110)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #1: Total reward: 7.90902415546254
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #2: Total reward: 2.964287743449864
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #3: Total reward: 2.681845061886541
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #4: Total reward: 3.6718231684386993
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #5: Total reward: 1.0592605346270405
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #6: Total reward: 4.431637080815034
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #7: Total reward: 2.9843578259908505
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #8: Total reward: 1.2776697026951889
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #9: Total reward: 6.631869515229109
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #10: Total reward: 6.887275619073003
[2m[36m(ServeReplica:RLModel pid=1625851)[0m > Try #11: Total reward: 3.874585852662283
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:11,513 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:13,657 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m 2023-05-19 12:49:14,854	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m 2023-05-19 12:49:14,854	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1626273)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1626

[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Algorithm PPO with humanoid env for stand_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1626233)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #1: Total reward: 5.937109814208842
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #2: Total reward: 3.352135857454633
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #3: Total reward: 1.5122889935497834
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #4: Total reward: 7.789150869137811
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #5: Total reward: 9.114012886251333
[2m[36m(ServeReplica:RLModel pid=1626233)[0m > Try #6: Total reward: 1.89947170353

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:34,603 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:36,743 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1626619)[0m 2023-05-19 12:49:37,934	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1626619)[0m 2023-05-19 12:49:37,935	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1626656)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1626

[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Algorithm PPO with humanoid env for stand_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1626619)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1626619)[0m 2023-05-19 12:49:41,721	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1626619)[0m 2023-05-19 12:49:41,721	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1626882)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1626882)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1626882)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1626881)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #1: Total reward: 2.5484668124662297
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #2: Total reward: 8.844196824428636
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #3: Total reward: 0.7807239214024728
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #4: Total reward: 9.108581191350615
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #5: Total reward: 9.209639872197046
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #6: Total reward: 2.6204554767043127
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #7: Total reward: 3.3051334906730174
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #8: Total reward: 6.5189395943235535
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #9: Total reward: 6.934484794083803
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #10: Total reward: 9.456845480307052
[2m[36m(ServeReplica:RLModel pid=1626619)[0m > Try #11: Total reward: 9.871009091683867
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:57,680 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:49:59,823 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m 2023-05-19 12:50:01,014	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m 2023-05-19 12:50:01,015	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1627039)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1627

[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Algorithm PPO with humanoid env for stand_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1627003)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #1: Total reward: 2.1004693546430726
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #2: Total reward: 7.962349210592188
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #3: Total reward: 3.118520109319926
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #4: Total reward: 4.476726109601126
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #5: Total reward: 6.473505143840408
[2m[36m(ServeReplica:RLModel pid=1627003)[0m > Try #6: Total reward: 7.71663528928

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:50:20,682 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:50:22,821 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m 2023-05-19 12:50:24,009	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m 2023-05-19 12:50:24,009	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1627426)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1627

[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Algorithm PPO with humanoid env for stand_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1627386)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #1: Total reward: 2.1336280107284575
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #2: Total reward: 3.622566282210119
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #3: Total reward: 0.6078282730531706
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #4: Total reward: 1.5447725913253625
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #5: Total reward: 2.830463746137273
[2m[36m(ServeReplica:RLModel pid=1627386)[0m > Try #6: Total reward: 2.139332354

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:50:43,756 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:50:45,897 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m 2023-05-19 12:50:47,085	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m 2023-05-19 12:50:47,086	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1627811)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1627

[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Algorithm PPO with humanoid env for stand_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1627770)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #1: Total reward: 8.87326806504848
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #2: Total reward: 6.839263022855389
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #3: Total reward: 7.213465041915861
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #4: Total reward: 4.681505522380236
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #5: Total reward: 4.196151610873205
[2m[36m(ServeReplica:RLModel pid=1627770)[0m > Try #6: Total reward: 9.1489755356872

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:06,850 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:08,994 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m 2023-05-19 12:51:10,188	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m 2023-05-19 12:51:10,188	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1628191)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1628

[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Algorithm PPO with humanoid env for stand_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1628153)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #1: Total reward: 2.2218377155345097
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #2: Total reward: 5.642047735012333
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #3: Total reward: 6.586408999205503
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #4: Total reward: 3.7082938559672094
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #5: Total reward: 4.849447368232568
[2m[36m(ServeReplica:RLModel pid=1628153)[0m > Try #6: Total reward: 8.3175346552

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:30,865 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:33,007 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1628541)[0m 2023-05-19 12:51:34,195	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1628541)[0m 2023-05-19 12:51:34,196	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1628578)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1628

[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Algorithm PPO with humanoid env for stand_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1628541)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1628541)[0m 2023-05-19 12:51:37,922	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1628541)[0m 2023-05-19 12:51:37,922	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1628803)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1628803)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1628803)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1628804)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #1: Total reward: 4.673493322711408
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #2: Total reward: 7.533333430472136
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #3: Total reward: 6.723076133517613
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #4: Total reward: 4.798213334088566
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #5: Total reward: 0.09053392931725265
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #6: Total reward: 4.946527019945388
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #7: Total reward: 7.143647814695589
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #8: Total reward: 7.841430515188767
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #9: Total reward: 5.5463171831851605
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #10: Total reward: 3.5600154522226273
[2m[36m(ServeReplica:RLModel pid=1628541)[0m > Try #11: Total reward: 1.2803857701681252
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:53,950 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:51:56,092 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m 2023-05-19 12:51:57,287	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m 2023-05-19 12:51:57,287	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1628965)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1628

[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Algorithm PPO with humanoid env for stand_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1628927)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #1: Total reward: 3.317888362225489
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #2: Total reward: 6.092906519869061
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #3: Total reward: 7.917458101421393
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #4: Total reward: 3.179366575210972
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #5: Total reward: 1.1887396268253623
[2m[36m(ServeReplica:RLModel pid=1628927)[0m > Try #6: Total reward: 4.55542585901

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:52:17,055 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:52:19,195 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m 2023-05-19 12:52:20,402	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m 2023-05-19 12:52:20,402	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1629352)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1629

[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Algorithm PPO with humanoid env for stand_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1629310)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #1: Total reward: 9.3279044988935
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #2: Total reward: 9.306288670976818
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #3: Total reward: 8.653904710855809
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #4: Total reward: 6.172142363281918
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #5: Total reward: 3.1105699009294394
[2m[36m(ServeReplica:RLModel pid=1629310)[0m > Try #6: Total reward: 0.8255330949970

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:52:40,136 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:52:42,279 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1629694)[0m 2023-05-19 12:52:43,479	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1629694)[0m 2023-05-19 12:52:43,480	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1629735)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1629

[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Algorithm PPO with humanoid env for stand_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1629694)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1629694)[0m 2023-05-19 12:52:47,260	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1629694)[0m 2023-05-19 12:52:47,260	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1629958)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1629958)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1629958)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1629957)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #1: Total reward: 1.4226118178508511
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #2: Total reward: 2.0942198308803586
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #3: Total reward: 5.159881372176625
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #4: Total reward: 10.995856091549765
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #5: Total reward: 11.688990292828796
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #6: Total reward: 5.768728606070502
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #7: Total reward: 5.316073379912411
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #8: Total reward: 7.859608700206304
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #9: Total reward: 4.59639058236454
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #10: Total reward: 3.4000026680136353
[2m[36m(ServeReplica:RLModel pid=1629694)[0m > Try #11: Total reward: 10.251758796142266
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:03,123 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:05,267 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1630081)[0m 2023-05-19 12:53:06,449	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1630081)[0m 2023-05-19 12:53:06,450	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1630123)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1630

[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Algorithm PPO with humanoid env for stand_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1630081)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1630081)[0m 2023-05-19 12:53:10,338	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1630081)[0m 2023-05-19 12:53:10,338	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 431.06680631637573, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1630343)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1630343)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1630343)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1630344)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #1: Total reward: 5.508330552717396
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #2: Total reward: 8.956745614539383
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #3: Total reward: 10.072755242848643
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #4: Total reward: 4.007471242951168
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #5: Total reward: 6.984274642871485
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #6: Total reward: 10.862688844726735
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #7: Total reward: 2.0031759606906014
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #8: Total reward: 4.920265078288656
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #9: Total reward: 4.111198369994727
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #10: Total reward: 5.997629934971816
[2m[36m(ServeReplica:RLModel pid=1630081)[0m > Try #11: Total reward: 1.5836932123502507
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:26,216 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:28,357 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1630465)[0m 2023-05-19 12:53:29,552	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1630465)[0m 2023-05-19 12:53:29,553	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1630502)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1630

[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Algorithm PPO with humanoid env for stand_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1630465)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1630465)[0m 2023-05-19 12:53:33,332	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1630465)[0m 2023-05-19 12:53:33,332	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 432.3389937877655, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1630729)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1630729)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1630729)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1630728)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #1: Total reward: 7.039624826339117
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #2: Total reward: 2.3508237979026396
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #3: Total reward: 1.1937132516810594
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #4: Total reward: 7.161809709585027
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #5: Total reward: 3.5248221437772775
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #6: Total reward: 0.15460844135474253
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #7: Total reward: 7.703111331621554
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #8: Total reward: 4.883259735338484
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #9: Total reward: 6.350481480829053
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #10: Total reward: 0.8030854254819749
[2m[36m(ServeReplica:RLModel pid=1630465)[0m > Try #11: Total reward: 2.212541975917434
[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:49,303 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:53:51,443 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m 2023-05-19 12:53:52,632	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m 2023-05-19 12:53:52,632	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1630887)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1630

[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Algorithm PPO with humanoid env for stand_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1630852)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #1: Total reward: 5.564998912149458
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #2: Total reward: 7.228361824523462
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #3: Total reward: 4.266319022025769
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #4: Total reward: 8.013926634216874
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #5: Total reward: 9.485936909839802
[2m[36m(ServeReplica:RLModel pid=1630852)[0m > Try #6: Total reward: 2.036818736706

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:54:12,386 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:54:14,529 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1631236)[0m 2023-05-19 12:54:15,733	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1631236)[0m 2023-05-19 12:54:15,733	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1631276)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1631

[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Algorithm PPO with humanoid env for stand_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1631236)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1631236)[0m 2023-05-19 12:54:19,513	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1631236)[0m 2023-05-19 12:54:19,513	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 493.1542057991028, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1631496)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1631496)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1631496)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1631497)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorke

[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #1: Total reward: 3.149267027404527
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #2: Total reward: 10.474532739473624
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #3: Total reward: 0.6596435297987567
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #4: Total reward: 1.8093754226271748
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #5: Total reward: 8.92313345523881
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #6: Total reward: 0.29732340976588284
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #7: Total reward: 6.470439968088177
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #8: Total reward: 5.8785265131242195
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #9: Total reward: 6.460355648197744
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #10: Total reward: 5.379600747063359
[2m[36m(ServeReplica:RLModel pid=1631236)[0m > Try #11: Total reward: 2.331028373586918
[2

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:54:35,483 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:54:37,623 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1631620)[0m 2023-05-19 12:54:38,826	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1631620)[0m 2023-05-19 12:54:38,826	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1631657)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1631

[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Algorithm PPO with humanoid env for stand_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1631620)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.


[2m[36m(ServeReplica:RLModel pid=1631620)[0m 2023-05-19 12:54:42,565	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-stand_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1631620)[0m 2023-05-19 12:54:42,565	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 428.5977358818054, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1631882)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1631882)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1631882)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1631881)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #1: Total reward: 4.990965501124941
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #2: Total reward: 7.065375869406224
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #3: Total reward: 8.202387472399877
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #4: Total reward: 4.989992201646349
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #5: Total reward: 1.4018035393648645
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #6: Total reward: 1.5002945945783832
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #7: Total reward: 3.2626243654617646
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #8: Total reward: 8.348297491504377
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #9: Total reward: 1.9138110171396625
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #10: Total reward: 0.05310401471695357
[2m[36m(ServeReplica:RLModel pid=1631620)[0m > Try #11: Total reward: 3.5036362294684498


[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:54:58,474 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:55:00,617 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m 2023-05-19 12:55:01,814	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m 2023-05-19 12:55:01,815	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1632044)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1632

[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Algorithm PPO with humanoid env for stand_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1632004)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #1: Total reward: 1.414844738752534
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #2: Total reward: 4.405289987599986
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #3: Total reward: 2.094282192609237
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #4: Total reward: 6.947368169695453
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #5: Total reward: 5.5758017960400945
[2m[36m(ServeReplica:RLModel pid=1632004)[0m > Try #6: Total reward: 4.775415452682329
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:55:22,587 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:55:24,730 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m 2023-05-19 12:55:25,918	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m 2023-05-19 12:55:25,919	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1632432)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1632

[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Algorithm PPO with humanoid env for stand_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1632391)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #1: Total reward: 3.493186026340608
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #2: Total reward: 4.590223177709891
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #3: Total reward: 4.911514685726428
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #4: Total reward: 4.2151166712390005
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #5: Total reward: 1.0871091323844864
[2m[36m(ServeReplica:RLModel pid=1632391)[0m > Try #6: Total reward: 7.636619879902637
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:55:45,676 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:55:47,817 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m 2023-05-19 12:55:49,005	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m 2023-05-19 12:55:49,005	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1632817)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1632

[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Algorithm PPO with humanoid env for stand_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1632778)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #1: Total reward: 5.965394383763221
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #2: Total reward: 4.578007267109298
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #3: Total reward: 2.3688924712116926
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #4: Total reward: 0.782090088612461
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #5: Total reward: 7.80110347487207
[2m[36m(ServeReplica:RLModel pid=1632778)[0m > Try #6: Total reward: 2.1195297414162857
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:09,679 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:11,819 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m 2023-05-19 12:56:12,999	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m 2023-05-19 12:56:13,000	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1633202)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1633

[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Algorithm PPO with humanoid env for stand_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1633163)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #1: Total reward: 11.000456419618226
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #2: Total reward: 1.3102294228167342
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #3: Total reward: 3.7805166649078155
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #4: Total reward: 8.732143302632547
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #5: Total reward: 1.6827241799876742
[2m[36m(ServeReplica:RLModel pid=1633163)[0m > Try #6: Total reward: 2.015629693885158
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:33,181 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:35,319 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1633547)[0m 2023-05-19 12:56:36,510	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1633547)[0m 2023-05-19 12:56:36,511	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1633587)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1633

[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Algorithm PPO with humanoid env for run_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1633547)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.


[2m[36m(ServeReplica:RLModel pid=1633547)[0m 2023-05-19 12:56:40,264	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1633547)[0m 2023-05-19 12:56:40,264	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1633809)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1633809)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1633809)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1633810)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker p

[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #1: Total reward: 0.6682531922573615
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #2: Total reward: 1.6904893596731052
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #3: Total reward: 0.5758413227213371
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #4: Total reward: 0.12252184303095433
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #5: Total reward: 0.8377036039953489
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #6: Total reward: 0.25065704205599026
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #7: Total reward: 1.0404622962487162
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #8: Total reward: 0.17131219304590223
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #9: Total reward: 1.5086008087326406
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #10: Total reward: 1.221054380447795
[2m[36m(ServeReplica:RLModel pid=1633547)[0m > Try #11: Total reward: 0.810600039670

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:56,267 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:56:58,406 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m 2023-05-19 12:56:59,587	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m 2023-05-19 12:56:59,588	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1633972)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1633

[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Algorithm PPO with humanoid env for run_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1633932)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #1: Total reward: 1.1446899222198506
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #2: Total reward: 1.2728691543649189
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #3: Total reward: 0.3236477320162016
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #4: Total reward: 0.9080831970049207
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #5: Total reward: 0.12958653875139345
[2m[36m(ServeReplica:RLModel pid=1633932)[0m > Try #6: Total reward: 0.785043326534017
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:57:19,257 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:57:21,399 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m 2023-05-19 12:57:22,590	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m 2023-05-19 12:57:22,591	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1634358)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1634

[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Algorithm PPO with humanoid env for run_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1634317)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #1: Total reward: 0.356055958632664
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #2: Total reward: 0.7864884170426534
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #3: Total reward: 0.6980320430880075
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #4: Total reward: 1.3236266730143966
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #5: Total reward: 0.09940610973160854
[2m[36m(ServeReplica:RLModel pid=1634317)[0m > Try #6: Total reward: 1.3950005728275996
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:57:42,352 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:57:44,493 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m 2023-05-19 12:57:45,681	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m 2023-05-19 12:57:45,682	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1634736)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1634

[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Algorithm PPO with humanoid env for run_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1634700)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #1: Total reward: 1.2897814116500348
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #2: Total reward: 0.8075136497902184
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #3: Total reward: 0.10386809404382864
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #4: Total reward: 1.3208044361915123
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #5: Total reward: 0.16648939911182725
[2m[36m(ServeReplica:RLModel pid=1634700)[0m > Try #6: Total reward: 1.0966523677418323
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:05,379 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:07,522 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1635089)[0m 2023-05-19 12:58:08,714	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1635089)[0m 2023-05-19 12:58:08,715	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1635129)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1635

[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Algorithm PPO with humanoid env for run_param_0 task has been build.
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1635089)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '12.0'.


[2m[36m(ServeReplica:RLModel pid=1635089)[0m 2023-05-19 12:58:12,473	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1635089)[0m 2023-05-19 12:58:12,473	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1635352)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1635352)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1635352)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1635353)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #1: Total reward: 1.6124278702551227
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #2: Total reward: 0.7170383561847609
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #3: Total reward: 0.09140306496225714
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #4: Total reward: 0.8012044269562947
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #5: Total reward: 0.8189484319681336
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #6: Total reward: 0.7997123404833011
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #7: Total reward: 0.8601821672361113
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #8: Total reward: 1.5628683149049074
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #9: Total reward: 1.8151184985573403
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #10: Total reward: 1.0644628952968687
[2m[36m(ServeReplica:RLModel pid=1635089)[0m > Try #11: Total reward: 0.5416721919757

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:28,464 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:30,603 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m 2023-05-19 12:58:31,795	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m 2023-05-19 12:58:31,795	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1635516)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1635

[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Algorithm PPO with humanoid env for run_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1635476)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #1: Total reward: 1.7116153252321025
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #2: Total reward: 1.0386741592720565
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #3: Total reward: 0.6438934015663567
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #4: Total reward: 1.6395309809959402
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #5: Total reward: 1.7339056176797834
[2m[36m(ServeReplica:RLModel pid=1635476)[0m > Try #6: Total reward: 0.9183643091706947
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:51,553 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:58:53,695 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m 2023-05-19 12:58:54,894	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m 2023-05-19 12:58:54,895	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1635900)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1635

[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Algorithm PPO with humanoid env for run_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1635860)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #1: Total reward: 1.1825564482244266
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #2: Total reward: 1.850407560879676
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #3: Total reward: 1.6256824392832552
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #4: Total reward: 0.8979369562223865
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #5: Total reward: 0.26240019387795577
[2m[36m(ServeReplica:RLModel pid=1635860)[0m > Try #6: Total reward: 0.7641703967374691
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:59:14,617 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:59:16,758 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m 2023-05-19 12:59:17,943	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m 2023-05-19 12:59:17,944	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1636283)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1636

[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Algorithm PPO with humanoid env for run_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1636244)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #1: Total reward: 0.5826985574811756
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #2: Total reward: 1.3993811351115422
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #3: Total reward: 1.298867892686769
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #4: Total reward: 1.8783513266626972
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #5: Total reward: 0.6886006886676482
[2m[36m(ServeReplica:RLModel pid=1636244)[0m > Try #6: Total reward: 0.7574569490385745
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:59:37,701 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 12:59:39,844 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m 2023-05-19 12:59:41,035	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m 2023-05-19 12:59:41,036	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1636665)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1636

[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Algorithm PPO with humanoid env for run_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1636627)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #1: Total reward: 0.7248381287009161
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #2: Total reward: 0.10657122512053561
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #3: Total reward: 0.9052853538925602
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #4: Total reward: 1.5124693953810222
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #5: Total reward: 0.27908748900628666
[2m[36m(ServeReplica:RLModel pid=1636627)[0m > Try #6: Total reward: 1.0004235668243913
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:00,791 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:02,930 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1637013)[0m 2023-05-19 13:00:04,118	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1637013)[0m 2023-05-19 13:00:04,119	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1637054)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1637

[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Algorithm PPO with humanoid env for run_param_1 task has been build.
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1637013)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '24.0'.


[2m[36m(ServeReplica:RLModel pid=1637013)[0m 2023-05-19 13:00:07,904	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1637013)[0m 2023-05-19 13:00:07,904	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1637013)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1637013)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1637013)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1637276)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2

[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #1: Total reward: 1.0510856055533253
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #2: Total reward: 0.5617406753137333
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #3: Total reward: 0.8399534140627044
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #4: Total reward: 0.49880799973221024
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #5: Total reward: 0.4454632557546338
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #6: Total reward: 0.7243338485037664
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #7: Total reward: 0.32598220925683064
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #8: Total reward: 0.5540711988210655
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #9: Total reward: 1.3888666199194224
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #10: Total reward: 1.6048996333993804
[2m[36m(ServeReplica:RLModel pid=1637013)[0m > Try #11: Total reward: 1.206570549087

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:23,873 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:26,016 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m 2023-05-19 13:00:27,210	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m 2023-05-19 13:00:27,211	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1637438)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1637

[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Algorithm PPO with humanoid env for run_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1637399)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #1: Total reward: 1.5184524519428926
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #2: Total reward: 0.9712797691671243
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #3: Total reward: 1.378227739602443
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #4: Total reward: 0.9549508842887575
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #5: Total reward: 0.3774640676725452
[2m[36m(ServeReplica:RLModel pid=1637399)[0m > Try #6: Total reward: 0.224362439008

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:46,846 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:00:48,984 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m 2023-05-19 13:00:50,170	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m 2023-05-19 13:00:50,170	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1637829)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1637

[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Algorithm PPO with humanoid env for run_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1637788)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #1: Total reward: 0.7379462836958044
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #2: Total reward: 0.4739088859632801
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #3: Total reward: 1.1848943660910172
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #4: Total reward: 0.023084736347427633
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #5: Total reward: 1.359980317940672
[2m[36m(ServeReplica:RLModel pid=1637788)[0m > Try #6: Total reward: 0.3197620756

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:09,921 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:12,064 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m 2023-05-19 13:01:13,255	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m 2023-05-19 13:01:13,255	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1638210)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1638

[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Algorithm PPO with humanoid env for run_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1638171)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #1: Total reward: 0.05802560600826408
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #2: Total reward: 1.4413007701399794
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #3: Total reward: 0.20785174588071875
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #4: Total reward: 0.9896681773489582
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #5: Total reward: 1.4366020736485792
[2m[36m(ServeReplica:RLModel pid=1638171)[0m > Try #6: Total reward: 1.478963974

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:33,004 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:35,142 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m 2023-05-19 13:01:36,337	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m 2023-05-19 13:01:36,338	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1638592)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1638

[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Algorithm PPO with humanoid env for run_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1638554)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #1: Total reward: 0.4963849706906537
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #2: Total reward: 1.5733691355692758
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #3: Total reward: 0.24372048469022753
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #4: Total reward: 0.8922239216953551
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #5: Total reward: 0.7050841802285003
[2m[36m(ServeReplica:RLModel pid=1638554)[0m > Try #6: Total reward: 1.5854717511

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:56,094 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:01:58,236 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m 2023-05-19 13:01:59,418	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m 2023-05-19 13:01:59,418	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1638981)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1638

[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Algorithm PPO with humanoid env for run_param_2 task has been build.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1638941)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '36.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #1: Total reward: 0.3737869268997095
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #2: Total reward: 0.4206100844207954
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #3: Total reward: 1.2656001150201657
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #4: Total reward: 0.8279189336065125
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #5: Total reward: 1.6072815717152606
[2m[36m(ServeReplica:RLModel pid=1638941)[0m > Try #6: Total reward: 0.61638472061

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:02:19,178 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:02:21,320 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1639326)[0m 2023-05-19 13:02:22,502	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1639326)[0m 2023-05-19 13:02:22,502	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1639367)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1639

[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Algorithm PPO with humanoid env for run_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1639326)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.


[2m[36m(ServeReplica:RLModel pid=1639326)[0m 2023-05-19 13:02:26,391	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1639326)[0m 2023-05-19 13:02:26,391	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1639589)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1639589)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1639589)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1639588)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker p

[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #1: Total reward: 0.7691312274017336
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #2: Total reward: 0.7103719026895674
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #3: Total reward: 0.9646872406831751
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #4: Total reward: 1.795652676020341
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #5: Total reward: 0.04709824727035668
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #6: Total reward: 1.7438170171797014
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #7: Total reward: 0.8321182576679642
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #8: Total reward: 0.700671651271171
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #9: Total reward: 1.4407643730000848
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #10: Total reward: 1.046665997799791
[2m[36m(ServeReplica:RLModel pid=1639326)[0m > Try #11: Total reward: 1.0267663150785276

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:02:42,273 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:02:44,415 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m 2023-05-19 13:02:45,610	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m 2023-05-19 13:02:45,611	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1639746)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1639

[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Algorithm PPO with humanoid env for run_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1639711)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #1: Total reward: 1.2750619431675247
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #2: Total reward: 1.1398088198656249
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #3: Total reward: 1.772622332062325
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #4: Total reward: 0.3202148380506212
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #5: Total reward: 1.0136545785658329
[2m[36m(ServeReplica:RLModel pid=1639711)[0m > Try #6: Total reward: 0.3742445951007933
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:05,273 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:07,415 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m 2023-05-19 13:03:08,615	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m 2023-05-19 13:03:08,615	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1640138)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1640

[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Algorithm PPO with humanoid env for run_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1640097)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #1: Total reward: 1.9266417091464463
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #2: Total reward: 0.8822869097319166
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #3: Total reward: 0.8820152072063376
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #4: Total reward: 0.33223607896774887
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #5: Total reward: 0.4911903198057905
[2m[36m(ServeReplica:RLModel pid=1640097)[0m > Try #6: Total reward: 0.8713307690196589
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:28,356 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:30,497 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m 2023-05-19 13:03:31,695	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m 2023-05-19 13:03:31,695	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1640523)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1640

[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Algorithm PPO with humanoid env for run_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1640481)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #1: Total reward: 0.29827764014786035
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #2: Total reward: 0.7728306938647277
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #3: Total reward: 0.6879247516694629
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #4: Total reward: 0.09308155733633265
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #5: Total reward: 0.37397405386535165
[2m[36m(ServeReplica:RLModel pid=1640481)[0m > Try #6: Total reward: 0.8782020910957659
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:51,447 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:03:53,586 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m 2023-05-19 13:03:54,772	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m 2023-05-19 13:03:54,772	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1640908)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1640

[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Algorithm PPO with humanoid env for run_param_3 task has been build.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1640867)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '48.0'.
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #1: Total reward: 0.6941926711550868
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #2: Total reward: 0.5157501187501012
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #3: Total reward: 0.6041910303738176
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #4: Total reward: 0.8068158280245341
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #5: Total reward: 1.0160573001854827
[2m[36m(ServeReplica:RLModel pid=1640867)[0m > Try #6: Total reward: 0.23819104207989775
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:04:14,534 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:04:16,674 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m 2023-05-19 13:04:17,870	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m 2023-05-19 13:04:17,870	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1641293)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1641

[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Algorithm PPO with humanoid env for run_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1641253)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #1: Total reward: 1.5616011406617587
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #2: Total reward: 0.39049886555588753
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #3: Total reward: 0.489834745177792
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #4: Total reward: 0.9852719130886108
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #5: Total reward: 0.5083274177578058
[2m[36m(ServeReplica:RLModel pid=1641253)[0m > Try #6: Total reward: 0.33375646945708215
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:04:37,536 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:04:39,678 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m 2023-05-19 13:04:40,871	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m 2023-05-19 13:04:40,871	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1641674)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1641

[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Algorithm PPO with humanoid env for run_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1641639)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #1: Total reward: 0.6725651624374126
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #2: Total reward: 0.8418811637758332
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #3: Total reward: 2.514258804310967
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #4: Total reward: 0.6714794494569539
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #5: Total reward: 0.015719431062911793
[2m[36m(ServeReplica:RLModel pid=1641639)[0m > Try #6: Total reward: 1.3028038967433133
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:00,638 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:02,781 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1642021)[0m 2023-05-19 13:05:03,973	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1642021)[0m 2023-05-19 13:05:03,973	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1642058)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1642

[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Algorithm PPO with humanoid env for run_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1642021)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.


[2m[36m(ServeReplica:RLModel pid=1642021)[0m 2023-05-19 13:05:07,773	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1642021)[0m 2023-05-19 13:05:07,773	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 491.70674085617065, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1642283)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1642283)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1642283)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1642284)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #1: Total reward: 0.6659647308438261
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #2: Total reward: 0.39203437886978665
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #3: Total reward: 1.0507797566066532
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #4: Total reward: 0.38029328999740486
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #5: Total reward: 1.2454519089091087
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #6: Total reward: 1.484955778554644
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #7: Total reward: 0.27012436749255214
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #8: Total reward: 1.1597799968731686
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #9: Total reward: 1.037599866467812
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #10: Total reward: 1.6533615155492023
[2m[36m(ServeReplica:RLModel pid=1642021)[0m > Try #11: Total reward: 1.8186340273208

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:23,725 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:25,868 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1642405)[0m 2023-05-19 13:05:27,061	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1642405)[0m 2023-05-19 13:05:27,061	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1642440)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1642

[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Algorithm PPO with humanoid env for run_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1642405)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.


[2m[36m(ServeReplica:RLModel pid=1642405)[0m 2023-05-19 13:05:30,887	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1642405)[0m 2023-05-19 13:05:30,888	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 483.58794140815735, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1642667)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1642667)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1642667)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1642666)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #1: Total reward: 1.0363466456313297
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #2: Total reward: 0.9155628748921606
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #3: Total reward: 0.4012168099680903
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #4: Total reward: 1.1684441378982315
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #5: Total reward: 1.5850206362336037
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #6: Total reward: 1.2079607286495146
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #7: Total reward: 0.3624643525342473
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #8: Total reward: 1.0808748068438077
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #9: Total reward: 1.8170318031897432
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #10: Total reward: 0.9733444151640882
[2m[36m(ServeReplica:RLModel pid=1642405)[0m > Try #11: Total reward: 1.18050142173354

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:46,725 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:05:48,867 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1642788)[0m 2023-05-19 13:05:50,045	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1642788)[0m 2023-05-19 13:05:50,045	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1642824)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1642

[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Algorithm PPO with humanoid env for run_param_4 task has been build.
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1642788)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '60.0'.


[2m[36m(ServeReplica:RLModel pid=1642788)[0m 2023-05-19 13:05:53,796	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1642788)[0m 2023-05-19 13:05:53,796	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1642788)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1642788)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1642788)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1643050)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2

[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #1: Total reward: 1.1578876070218123
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #2: Total reward: 0.4275800370545287
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #3: Total reward: 1.7518747605535678
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #4: Total reward: 2.0268339079034523
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #5: Total reward: 1.0928255728606777
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #6: Total reward: 0.6171849603191027
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #7: Total reward: 1.3329421157136718
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #8: Total reward: 0.06167382362530314
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #9: Total reward: 0.6922426154558252
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #10: Total reward: 0.9757336250027445
[2m[36m(ServeReplica:RLModel pid=1642788)[0m > Try #11: Total reward: 0.6802351620510

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:09,813 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:11,953 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1643172)[0m 2023-05-19 13:06:13,140	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1643172)[0m 2023-05-19 13:06:13,140	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1643214)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1643

[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Algorithm PPO with humanoid env for run_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1643172)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.


[2m[36m(ServeReplica:RLModel pid=1643172)[0m 2023-05-19 13:06:17,010	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1643172)[0m 2023-05-19 13:06:17,010	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1643172)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1643172)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1643172)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1643433)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m

[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #1: Total reward: 0.10553043456506027
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #2: Total reward: 1.0285803644542786
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #3: Total reward: 0.980611834023379
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #4: Total reward: 1.9728992324020624
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #5: Total reward: 1.9328889401090472
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #6: Total reward: 0.7109470273838314
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #7: Total reward: 0.36117500579215855
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #8: Total reward: 1.7563570615567798
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #9: Total reward: 1.2723380185275612
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #10: Total reward: 1.2098987547878384
[2m[36m(ServeReplica:RLModel pid=1643172)[0m > Try #11: Total reward: 0.0998204688208

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:32,913 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:35,057 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m 2023-05-19 13:06:36,235	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m 2023-05-19 13:06:36,235	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1643597)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1643

[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Algorithm PPO with humanoid env for run_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1643556)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #1: Total reward: 2.005611236473282
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #2: Total reward: 1.507551743112592
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #3: Total reward: 1.0218574003708287
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #4: Total reward: 1.9490170369552895
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #5: Total reward: 0.963328668800694
[2m[36m(ServeReplica:RLModel pid=1643556)[0m > Try #6: Total reward: 1.1577286600486525
[2m[36m(

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:56,000 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:06:58,142 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m 2023-05-19 13:06:59,335	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m 2023-05-19 13:06:59,335	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1643974)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1643

[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Algorithm PPO with humanoid env for run_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1643940)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #1: Total reward: 0.12870432435401538
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #2: Total reward: 1.0018163473368422
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #3: Total reward: 1.5522045815379726
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #4: Total reward: 1.505184177333898
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #5: Total reward: 0.5753398877907153
[2m[36m(ServeReplica:RLModel pid=1643940)[0m > Try #6: Total reward: 1.8492099023648059
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:07:18,997 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:07:21,136 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m 2023-05-19 13:07:22,324	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m 2023-05-19 13:07:22,325	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1644365)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1644

[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Algorithm PPO with humanoid env for run_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1644324)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #1: Total reward: 1.1288542840584914
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #2: Total reward: 1.756075576014202
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #3: Total reward: 1.211791871466624
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #4: Total reward: 1.1454411724260973
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #5: Total reward: 0.24328848159701047
[2m[36m(ServeReplica:RLModel pid=1644324)[0m > Try #6: Total reward: 0.39647716771233765
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:07:42,082 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:07:44,220 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m 2023-05-19 13:07:45,416	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m 2023-05-19 13:07:45,416	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1644747)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1644

[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Algorithm PPO with humanoid env for run_param_5 task has been build.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1644710)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '72.0'.
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #1: Total reward: 0.32871144905658845
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #2: Total reward: 0.6543659985348242
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #3: Total reward: 0.5261104461592212
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #4: Total reward: 0.4364701485793893
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #5: Total reward: 1.565550759587257
[2m[36m(ServeReplica:RLModel pid=1644710)[0m > Try #6: Total reward: 1.1848952128888208
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:05,216 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:07,356 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m 2023-05-19 13:08:08,546	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m 2023-05-19 13:08:08,546	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1645135)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1645

[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Algorithm PPO with humanoid env for run_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1645099)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #1: Total reward: 1.0931922254324313
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #2: Total reward: 0.5314368722602095
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #3: Total reward: 0.43887707458059483
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #4: Total reward: 1.0947776792124968
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #5: Total reward: 0.6954797864691569
[2m[36m(ServeReplica:RLModel pid=1645099)[0m > Try #6: Total reward: 1.6686992092

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:28,199 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:30,342 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m 2023-05-19 13:08:31,538	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m 2023-05-19 13:08:31,538	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1645526)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1645

[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Algorithm PPO with humanoid env for run_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1645488)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #1: Total reward: 1.4980907471328417
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #2: Total reward: 0.6038543527810211
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #3: Total reward: 0.24546802166992845
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #4: Total reward: 0.3060644103507773
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #5: Total reward: 1.1410949918178848
[2m[36m(ServeReplica:RLModel pid=1645488)[0m > Try #6: Total reward: 0.9577597610

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:51,297 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:08:53,439 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m 2023-05-19 13:08:54,642	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m 2023-05-19 13:08:54,643	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1645913)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1645

[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Algorithm PPO with humanoid env for run_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1645872)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #1: Total reward: 1.3003401504861638
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #2: Total reward: 1.4839079417946708
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #3: Total reward: 0.16247216407651024
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #4: Total reward: 0.8242210921244653
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #5: Total reward: 0.2113949863591219
[2m[36m(ServeReplica:RLModel pid=1645872)[0m > Try #6: Total reward: 0.6830468263

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:09:14,385 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:09:16,529 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m 2023-05-19 13:09:17,718	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m 2023-05-19 13:09:17,719	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1646300)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1646

[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Algorithm PPO with humanoid env for run_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1646258)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #1: Total reward: 0.46782977710001106
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #2: Total reward: 1.8951356677975688
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #3: Total reward: 1.2710918251631407
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #4: Total reward: 0.4655954009090911
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #5: Total reward: 0.7623062631389453
[2m[36m(ServeReplica:RLModel pid=1646258)[0m > Try #6: Total reward: 0.9683371741

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:09:37,483 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:09:39,624 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1646641)[0m 2023-05-19 13:09:40,813	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1646641)[0m 2023-05-19 13:09:40,814	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1646682)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1646

[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Algorithm PPO with humanoid env for run_param_6 task has been build.
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1646641)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '84.00000000000001'.


[2m[36m(ServeReplica:RLModel pid=1646641)[0m 2023-05-19 13:09:44,575	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1646641)[0m 2023-05-19 13:09:44,575	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1646906)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1646906)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1646906)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1646907)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #1: Total reward: 0.8375099184279448
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #2: Total reward: 0.14985460228038136
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #3: Total reward: 0.057183398820301934
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #4: Total reward: 0.7329906163923623
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #5: Total reward: 0.301179088560552
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #6: Total reward: 1.681658072469604
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #7: Total reward: 0.3290696391242501
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #8: Total reward: 0.5094115972362032
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #9: Total reward: 2.087943144175863
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #10: Total reward: 1.7696723123297664
[2m[36m(ServeReplica:RLModel pid=1646641)[0m > Try #11: Total reward: 1.91825399825270

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:00,477 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:02,617 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1647031)[0m 2023-05-19 13:10:03,806	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1647031)[0m 2023-05-19 13:10:03,806	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1647068)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1647

[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Algorithm PPO with humanoid env for run_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1647031)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.


[2m[36m(ServeReplica:RLModel pid=1647031)[0m 2023-05-19 13:10:07,635	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1647031)[0m 2023-05-19 13:10:07,635	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1647294)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1647294)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1647294)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1647293)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker p

[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #1: Total reward: 0.5271481616769028
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #2: Total reward: 0.3478171655692626
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #3: Total reward: 0.23735278600664575
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #4: Total reward: 1.0281222165520116
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #5: Total reward: 0.26964054116358394
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #6: Total reward: 0.9082426057911286
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #7: Total reward: 1.038158381299849
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #8: Total reward: 0.21941713049153483
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #9: Total reward: 1.0074178752111493
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #10: Total reward: 1.1780110277897615
[2m[36m(ServeReplica:RLModel pid=1647031)[0m > Try #11: Total reward: 1.613481406594

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:23,578 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:25,721 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1647416)[0m 2023-05-19 13:10:26,910	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1647416)[0m 2023-05-19 13:10:26,910	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1647451)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1647

[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Algorithm PPO with humanoid env for run_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1647416)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.


[2m[36m(ServeReplica:RLModel pid=1647416)[0m 2023-05-19 13:10:30,726	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1647416)[0m 2023-05-19 13:10:30,726	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1647679)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1647679)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1647679)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1647680)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #1: Total reward: 1.0407185382229154
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #2: Total reward: 1.9906885608840672
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #3: Total reward: 1.5034528807663472
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #4: Total reward: 0.8305529017713797
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #5: Total reward: 0.22566128597085175
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #6: Total reward: 0.41628587182828325
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #7: Total reward: 0.7371956436521545
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #8: Total reward: 0.29086224086028456
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #9: Total reward: 0.5821262758589302
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #10: Total reward: 0.4519974458886405
[2m[36m(ServeReplica:RLModel pid=1647416)[0m > Try #11: Total reward: 1.47336471315

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:46,678 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:10:48,818 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m 2023-05-19 13:10:49,996	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m 2023-05-19 13:10:49,996	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1647845)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1647

[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Algorithm PPO with humanoid env for run_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1647802)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #1: Total reward: 1.4058926835901915
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #2: Total reward: 0.5400962824791636
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #3: Total reward: 1.075353134813784
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #4: Total reward: 0.13637072518834323
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #5: Total reward: 0.03160675013581201
[2m[36m(ServeReplica:RLModel pid=1647802)[0m > Try #6: Total reward: 0.8060442779020351
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:09,674 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:11,814 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m 2023-05-19 13:11:13,000	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m 2023-05-19 13:11:13,001	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1648227)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1648

[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Algorithm PPO with humanoid env for run_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1648187)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #1: Total reward: 1.1042816658738386
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #2: Total reward: 1.446362471810958
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #3: Total reward: 1.3333798116868634
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #4: Total reward: 0.6912059612410715
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #5: Total reward: 0.9203943156285029
[2m[36m(ServeReplica:RLModel pid=1648187)[0m > Try #6: Total reward: 1.3596254704892343
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:32,768 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:34,910 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m 2023-05-19 13:11:36,100	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m 2023-05-19 13:11:36,100	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1648613)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1648

[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Algorithm PPO with humanoid env for run_param_7 task has been build.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1648572)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '96.0'.
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #1: Total reward: 1.1238390337461615
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #2: Total reward: 0.2614689595484567
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #3: Total reward: 0.04367072168601598
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #4: Total reward: 1.4099679848378996
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #5: Total reward: 0.6121174248112387
[2m[36m(ServeReplica:RLModel pid=1648572)[0m > Try #6: Total reward: 0.8207697468987756
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:55,856 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:11:57,994 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m 2023-05-19 13:11:59,177	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m 2023-05-19 13:11:59,178	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1648996)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1648

[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Algorithm PPO with humanoid env for run_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1648955)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #1: Total reward: 2.2999003396119515
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #2: Total reward: 1.8381570582943998
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #3: Total reward: 0.8039147350949443
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #4: Total reward: 0.8689104557490104
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #5: Total reward: 0.39494224183340004
[2m[36m(ServeReplica:RLModel pid=1648955)[0m > Try #6: Total reward: 2.0345525275564302
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:12:18,927 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:12:21,065 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m 2023-05-19 13:12:22,258	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m 2023-05-19 13:12:22,259	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1649380)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1649

[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Algorithm PPO with humanoid env for run_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1649339)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #1: Total reward: 0.6178377139086129
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #2: Total reward: 1.431914230750615
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #3: Total reward: 0.9591867613070273
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #4: Total reward: 0.79110086542487
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #5: Total reward: 1.6284012474477545
[2m[36m(ServeReplica:RLModel pid=1649339)[0m > Try #6: Total reward: 0.31622330976060525
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:12:42,008 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:12:44,149 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1649724)[0m 2023-05-19 13:12:45,339	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1649724)[0m 2023-05-19 13:12:45,339	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1649761)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1649

[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Algorithm PPO with humanoid env for run_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1649724)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.


[2m[36m(ServeReplica:RLModel pid=1649724)[0m 2023-05-19 13:12:49,182	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1649724)[0m 2023-05-19 13:12:49,182	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 491.70674085617065, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1649986)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1649986)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1649986)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1649987)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #1: Total reward: 0.8432455723502083
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #2: Total reward: 0.7603264751301738
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #3: Total reward: 1.0456920656405684
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #4: Total reward: 0.6183918627456062
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #5: Total reward: 0.263422764931385
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #6: Total reward: 0.7651531835206372
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #7: Total reward: 1.2248592264272868
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #8: Total reward: 0.7939218766824172
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #9: Total reward: 0.8483832773614138
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #10: Total reward: 1.7160719496422667
[2m[36m(ServeReplica:RLModel pid=1649724)[0m > Try #11: Total reward: 1.146825051795243

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:05,083 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:07,226 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1650110)[0m 2023-05-19 13:13:08,419	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1650110)[0m 2023-05-19 13:13:08,419	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1650145)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1650

[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Algorithm PPO with humanoid env for run_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1650110)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.


[2m[36m(ServeReplica:RLModel pid=1650110)[0m 2023-05-19 13:13:12,354	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1650110)[0m 2023-05-19 13:13:12,354	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 483.58794140815735, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1650374)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1650374)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1650374)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1650373)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #1: Total reward: 0.9316677262861134
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #2: Total reward: 0.39545630642314333
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #3: Total reward: 1.3437474093595854
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #4: Total reward: 0.36072807871154816
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #5: Total reward: 0.19299286617845823
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #6: Total reward: 0.42044666791026875
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #7: Total reward: 0.9065011186294509
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #8: Total reward: 0.6626429268162353
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #9: Total reward: 0.5746428977442017
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #10: Total reward: 0.26044103713291855
[2m[36m(ServeReplica:RLModel pid=1650110)[0m > Try #11: Total reward: 0.465879443

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:28,077 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:30,215 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m 2023-05-19 13:13:31,400	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m 2023-05-19 13:13:31,401	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1650533)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1650

[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Algorithm PPO with humanoid env for run_param_8 task has been build.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1650496)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '108.0'.
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #1: Total reward: 1.020472789531638
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #2: Total reward: 1.311024798218695
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #3: Total reward: 1.6334741998933662
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #4: Total reward: 0.49489631048384014
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #5: Total reward: 0.9375364652164913
[2m[36m(ServeReplica:RLModel pid=1650496)[0m > Try #6: Total reward: 1.2515834820548335
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:51,165 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:13:53,306 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1650879)[0m 2023-05-19 13:13:54,510	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1650879)[0m 2023-05-19 13:13:54,510	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1650918)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1650

[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Algorithm PPO with humanoid env for run_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1650879)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.


[2m[36m(ServeReplica:RLModel pid=1650879)[0m 2023-05-19 13:13:58,333	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1650879)[0m 2023-05-19 13:13:58,333	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1651142)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1651142)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1651142)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1651141)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker p

[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #1: Total reward: 1.5566776734059757
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #2: Total reward: 1.621966578298286
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #3: Total reward: 1.4637375973095417
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #4: Total reward: 0.8405621087900461
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #5: Total reward: 1.4853840259219258
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #6: Total reward: 0.9609453599482932
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #7: Total reward: 1.4936713978919864
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #8: Total reward: 1.4324171470159246
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #9: Total reward: 0.3071729616359014
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #10: Total reward: 1.1278093807980913
[2m[36m(ServeReplica:RLModel pid=1650879)[0m > Try #11: Total reward: 0.810810969411269

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:14:14,233 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:14:16,374 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1651263)[0m 2023-05-19 13:14:17,576	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1651263)[0m 2023-05-19 13:14:17,576	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1651298)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1651

[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Algorithm PPO with humanoid env for run_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1651263)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.


[2m[36m(ServeReplica:RLModel pid=1651263)[0m 2023-05-19 13:14:21,364	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1651263)[0m 2023-05-19 13:14:21,364	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1651525)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1651525)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1651525)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1651526)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #1: Total reward: 0.39847479400757474
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #2: Total reward: 0.8621094036587218
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #3: Total reward: 0.769651650821281
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #4: Total reward: 0.5113670452080917
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #5: Total reward: 1.0482259470343906
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #6: Total reward: 0.19934126398630514
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #7: Total reward: 0.6766081090115368
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #8: Total reward: 0.8074578128838379
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #9: Total reward: 0.6604453634647345
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #10: Total reward: 0.6635647841713436
[2m[36m(ServeReplica:RLModel pid=1651263)[0m > Try #11: Total reward: 1.3967598258464

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:14:37,318 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:14:39,458 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m 2023-05-19 13:14:40,651	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m 2023-05-19 13:14:40,651	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1651689)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1651

[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Algorithm PPO with humanoid env for run_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1651647)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #1: Total reward: 0.9956647739692879
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #2: Total reward: 1.911027418328977
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #3: Total reward: 0.5266462732977524
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #4: Total reward: 0.9280322345916718
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #5: Total reward: 0.6638248206178193
[2m[36m(ServeReplica:RLModel pid=1651647)[0m > Try #6: Total reward: 1.2462334647558944
[2m[3

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:00,411 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:02,551 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m 2023-05-19 13:15:03,747	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m 2023-05-19 13:15:03,748	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1652072)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1652

[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Algorithm PPO with humanoid env for run_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1652030)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #1: Total reward: 0.38431170600960624
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #2: Total reward: 0.47438226282228596
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #3: Total reward: 1.9900902216088079
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #4: Total reward: 0.1633511210282767
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #5: Total reward: 1.6619839679068962
[2m[36m(ServeReplica:RLModel pid=1652030)[0m > Try #6: Total reward: 0.4559062349353609
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:23,500 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:25,641 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m 2023-05-19 13:15:26,830	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m 2023-05-19 13:15:26,830	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1652457)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1652

[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Algorithm PPO with humanoid env for run_param_9 task has been build.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1652415)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '120.0'.
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #1: Total reward: 0.8001483872566615
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #2: Total reward: 1.024664031958694
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #3: Total reward: 0.8863287544918594
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #4: Total reward: 1.297491992616207
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #5: Total reward: 1.2102066467332555
[2m[36m(ServeReplica:RLModel pid=1652415)[0m > Try #6: Total reward: 0.5050306611892338
[2m[36

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:46,484 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:15:48,624 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1652799)[0m 2023-05-19 13:15:49,821	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1652799)[0m 2023-05-19 13:15:49,822	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1652840)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1652

[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Algorithm PPO with humanoid env for run_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1652799)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1652799)[0m 2023-05-19 13:15:53,571	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1652799)[0m 2023-05-19 13:15:53,571	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1653062)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1653062)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1653062)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1653063)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker p

[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #1: Total reward: 0.5649956511478392
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #2: Total reward: 1.1159813693987144
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #3: Total reward: 0.4587258199062802
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #4: Total reward: 0.3127293055901616
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #5: Total reward: 0.71879967153384
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #6: Total reward: 0.38436641070939565
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #7: Total reward: 0.4022605404032104
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #8: Total reward: 0.7086564344544075
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #9: Total reward: 0.24044649878128974
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #10: Total reward: 0.05532119408131376
[2m[36m(ServeReplica:RLModel pid=1652799)[0m > Try #11: Total reward: 1.4329814659721

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:09,583 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:11,725 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1653184)[0m 2023-05-19 13:16:12,918	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1653184)[0m 2023-05-19 13:16:12,918	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1653223)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1653

[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Algorithm PPO with humanoid env for run_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1653184)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1653184)[0m 2023-05-19 13:16:16,645	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1653184)[0m 2023-05-19 13:16:16,645	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1653446)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1653446)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1653446)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1653447)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #1: Total reward: 0.21774120519343054
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #2: Total reward: 0.5027533176705229
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #3: Total reward: 1.0823938030315556
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #4: Total reward: 1.2993050685808707
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #5: Total reward: 1.6814481083866544
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #6: Total reward: 1.4259934877209461
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #7: Total reward: 1.2082158468954556
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #8: Total reward: 1.3803209530355316
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #9: Total reward: 0.5895567415169022
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #10: Total reward: 0.9846245966082912
[2m[36m(ServeReplica:RLModel pid=1653184)[0m > Try #11: Total reward: 1.5983887604115

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:32,682 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:34,824 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m 2023-05-19 13:16:36,023	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m 2023-05-19 13:16:36,024	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1653611)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1653

[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Algorithm PPO with humanoid env for run_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1653568)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #1: Total reward: 1.1763545185799613
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #2: Total reward: 0.5610956086973622
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #3: Total reward: 0.9362104205596935
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #4: Total reward: 0.938194627093786
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #5: Total reward: 1.0343877237762056
[2m[36m(ServeReplica:RLModel pid=1653568)[0m > Try #6: Total reward: 0.6869539258406611
[2m[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:55,677 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:16:57,821 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1653955)[0m 2023-05-19 13:16:59,002	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1653955)[0m 2023-05-19 13:16:59,002	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1653990)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1653

[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Algorithm PPO with humanoid env for run_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1653955)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1653955)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1653955)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1653955)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #1: Total reward: 0.8748785773053899
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #2: Total reward: 1.0225576796768978
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #3: Total reward: 0.22944027156953095
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #4: Total reward: 0.23339453593669107
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #5: Total reward: 0.37323164269747316
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #6: Total reward: 0.9359273933214091
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #7: Total reward: 0.3545467418388522
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #8: Total reward: 1.079004900964451
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #9: Total reward: 1.0147969321102064
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #10: Total reward: 0.8924946900400434
[2m[36m(ServeReplica:RLModel pid=1653955)[0m > Try #11: Total reward: 0.206829397033

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:17:18,768 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:17:20,911 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1654341)[0m 2023-05-19 13:17:22,101	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1654341)[0m 2023-05-19 13:17:22,102	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1654383)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1654

[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Algorithm PPO with humanoid env for run_param_10 task has been build.
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1654341)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '132.0'.


[2m[36m(ServeReplica:RLModel pid=1654341)[0m 2023-05-19 13:17:25,868	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1654341)[0m 2023-05-19 13:17:25,868	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1654341)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1654341)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1654341)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1654603)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2

[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #1: Total reward: 1.363998006222233
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #2: Total reward: 1.0955741400573649
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #3: Total reward: 1.5106909657013075
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #4: Total reward: 0.7815350798105923
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #5: Total reward: 0.2091480694201191
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #6: Total reward: 1.5610102104441912
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #7: Total reward: 0.782511103741993
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #8: Total reward: 0.6935810728313926
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #9: Total reward: 1.205733056832071
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #10: Total reward: 0.537839019258755
[2m[36m(ServeReplica:RLModel pid=1654341)[0m > Try #11: Total reward: 1.929087696243673
[

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:17:41,861 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:17:44,000 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m 2023-05-19 13:17:45,186	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m 2023-05-19 13:17:45,186	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1654761)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1654

[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Algorithm PPO with humanoid env for run_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1654726)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #1: Total reward: 0.9168279974293284
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #2: Total reward: 0.6815117940890463
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #3: Total reward: 1.3507284076915609
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #4: Total reward: 0.09234273057117859
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #5: Total reward: 1.9396337392211944
[2m[36m(ServeReplica:RLModel pid=1654726)[0m > Try #6: Total reward: 0.49873789

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:04,880 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:07,023 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m 2023-05-19 13:18:08,214	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m 2023-05-19 13:18:08,215	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1655154)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1655

[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Algorithm PPO with humanoid env for run_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1655119)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #1: Total reward: 0.35251102500000053
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #2: Total reward: 0.06699878263632737
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #3: Total reward: 0.40436521255101837
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #4: Total reward: 0.8537969270007557
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #5: Total reward: 0.8828961102748214
[2m[36m(ServeReplica:RLModel pid=1655119)[0m > Try #6: Total reward: 1.060343

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:27,976 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:30,116 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m 2023-05-19 13:18:31,295	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m 2023-05-19 13:18:31,295	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1655547)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1655

[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Algorithm PPO with humanoid env for run_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1655505)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #1: Total reward: 1.0738994335308976
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #2: Total reward: 0.3899589801747923
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #3: Total reward: 1.4196561378691004
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #4: Total reward: 0.6452774504416883
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #5: Total reward: 0.4249474299769226
[2m[36m(ServeReplica:RLModel pid=1655505)[0m > Try #6: Total reward: 1.768493899

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:51,050 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:18:53,192 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m 2023-05-19 13:18:54,375	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m 2023-05-19 13:18:54,375	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1655926)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1655

[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Algorithm PPO with humanoid env for run_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1655890)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #1: Total reward: 0.731019220834295
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #2: Total reward: 0.7375891474222763
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #3: Total reward: 0.5471344037715852
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #4: Total reward: 0.5195334034570285
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #5: Total reward: 1.444678467001162
[2m[36m(ServeReplica:RLModel pid=1655890)[0m > Try #6: Total reward: 1.36666783911

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:19:14,123 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:19:16,273 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1656276)[0m 2023-05-19 13:19:17,462	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1656276)[0m 2023-05-19 13:19:17,462	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1656313)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1656

[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Algorithm PPO with humanoid env for run_param_11 task has been build.
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1656276)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '144.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1656276)[0m 2023-05-19 13:19:21,282	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1656276)[0m 2023-05-19 13:19:21,282	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1656538)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1656538)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1656538)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1656539)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #1: Total reward: 1.2508198561951267
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #2: Total reward: 0.10853705557879759
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #3: Total reward: 0.6651888864781614
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #4: Total reward: 0.3338793865905064
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #5: Total reward: 1.833987233920713
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #6: Total reward: 0.40226672087565646
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #7: Total reward: 1.0741409935195119
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #8: Total reward: 1.6495609741816373
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #9: Total reward: 0.7629315206014368
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #10: Total reward: 1.836270676292767
[2m[36m(ServeReplica:RLModel pid=1656276)[0m > Try #11: Total reward: 0.05176941076809

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:19:37,226 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:19:39,367 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1656661)[0m 2023-05-19 13:19:40,563	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1656661)[0m 2023-05-19 13:19:40,564	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1656701)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1656

[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Algorithm PPO with humanoid env for run_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1656661)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1656661)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1656661)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1656661)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #1: Total reward: 0.5167692303025462
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #2: Total reward: 1.5530808500978661
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #3: Total reward: 0.5642156941560005
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #4: Total reward: 1.3708717534670039
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #5: Total reward: 0.16129010087486895
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #6: Total reward: 1.1243870144100265
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #7: Total reward: 1.0726561611580423
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #8: Total reward: 0.756212036235074
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #9: Total reward: 0.7106682101143357
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #10: Total reward: 0.700503795959943
[2m[36m(ServeReplica:RLModel pid=1656661)[0m > Try #11: Total reward: 0.307769711532435

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:00,219 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:02,362 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1657048)[0m 2023-05-19 13:20:03,554	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1657048)[0m 2023-05-19 13:20:03,554	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1657082)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1657

[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Algorithm PPO with humanoid env for run_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1657048)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1657048)[0m 2023-05-19 13:20:07,264	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1657048)[0m 2023-05-19 13:20:07,264	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1657310)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1657310)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1657310)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1657309)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #1: Total reward: 1.2368835527946063
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #2: Total reward: 0.9235552912084507
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #3: Total reward: 0.5886726394611894
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #4: Total reward: 0.15746641104120127
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #5: Total reward: 0.6922278123456839
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #6: Total reward: 1.301147842567313
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #7: Total reward: 0.9358838565586142
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #8: Total reward: 0.6283402981269056
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #9: Total reward: 1.6347251191882193
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #10: Total reward: 1.7433430860671009
[2m[36m(ServeReplica:RLModel pid=1657048)[0m > Try #11: Total reward: 0.20763423108158

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:23,311 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:25,455 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m 2023-05-19 13:20:26,646	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m 2023-05-19 13:20:26,646	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1657470)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1657

[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Algorithm PPO with humanoid env for run_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1657432)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #1: Total reward: 2.2116563223484795
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #2: Total reward: 0.5456013711649275
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #3: Total reward: 1.557628631606895
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #4: Total reward: 1.6813072160156894
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #5: Total reward: 0.9047978877726894
[2m[36m(ServeReplica:RLModel pid=1657432)[0m > Try #6: Total reward: 1.4754677029

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:46,412 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:20:48,552 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1657818)[0m 2023-05-19 13:20:49,739	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1657818)[0m 2023-05-19 13:20:49,740	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1657855)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1657

[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Algorithm PPO with humanoid env for run_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1657818)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1657818)[0m 2023-05-19 13:20:53,520	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1657818)[0m 2023-05-19 13:20:53,520	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 483.58794140815735, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1658081)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1658081)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1658081)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1658080)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #1: Total reward: 1.7559789299027886
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #2: Total reward: 1.9927835715173514
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #3: Total reward: 1.5091341064135497
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #4: Total reward: 0.662429063313023
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #5: Total reward: 0.8358059972397041
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #6: Total reward: 0.9003510718173304
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #7: Total reward: 0.27434829324702087
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #8: Total reward: 0.4883758000048171
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #9: Total reward: 1.1294742647357916
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #10: Total reward: 0.033908343716136394
[2m[36m(ServeReplica:RLModel pid=1657818)[0m > Try #11: Total reward: 0.275045348336

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:09,501 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:11,643 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1658202)[0m 2023-05-19 13:21:12,829	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1658202)[0m 2023-05-19 13:21:12,829	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1658242)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1658

[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Algorithm PPO with humanoid env for run_param_12 task has been build.
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1658202)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '156.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1658202)[0m 2023-05-19 13:21:16,540	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1658202)[0m 2023-05-19 13:21:16,540	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1658463)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1658463)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1658463)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1658464)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #1: Total reward: 0.4708937307817621
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #2: Total reward: 0.385265566952182
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #3: Total reward: 0.14882202627550878
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #4: Total reward: 0.0424425203827427
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #5: Total reward: 0.6651355077041124
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #6: Total reward: 0.8751279958645302
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #7: Total reward: 1.1515107945716354
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #8: Total reward: 1.149287798314513
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #9: Total reward: 0.47773954061328666
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #10: Total reward: 1.7400459563853878
[2m[36m(ServeReplica:RLModel pid=1658202)[0m > Try #11: Total reward: 0.72954556357846

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:32,586 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:34,727 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m 2023-05-19 13:21:35,921	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m 2023-05-19 13:21:35,921	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1658623)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1658

[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Algorithm PPO with humanoid env for run_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1658586)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #1: Total reward: 0.1327918557201189
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #2: Total reward: 0.39228233181541866
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #3: Total reward: 2.274227331468106
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #4: Total reward: 1.5302005722151895
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #5: Total reward: 0.47890229158640985
[2m[36m(ServeReplica:RLModel pid=1658586)[0m > Try #6: Total reward: 1.37119919

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:55,575 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:21:57,719 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m 2023-05-19 13:21:58,909	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m 2023-05-19 13:21:58,909	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1659014)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1659

[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Algorithm PPO with humanoid env for run_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1658972)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #1: Total reward: 1.6100500274271423
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #2: Total reward: 1.230153293972089
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #3: Total reward: 0.4113802976229913
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #4: Total reward: 0.8658663391432383
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #5: Total reward: 0.19664328228419153
[2m[36m(ServeReplica:RLModel pid=1658972)[0m > Try #6: Total reward: 0.527728686

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:22:18,662 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:22:20,803 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m 2023-05-19 13:22:21,997	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m 2023-05-19 13:22:21,997	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1659399)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1659

[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Algorithm PPO with humanoid env for run_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1659358)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #1: Total reward: 0.3410314590044325
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #2: Total reward: 1.5457014484616036
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #3: Total reward: 1.8783874398849965
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #4: Total reward: 0.22769379152995006
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #5: Total reward: 0.6269676872912084
[2m[36m(ServeReplica:RLModel pid=1659358)[0m > Try #6: Total reward: 0.14101638

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:22:41,769 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:22:43,913 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m 2023-05-19 13:22:45,102	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m 2023-05-19 13:22:45,103	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1659781)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1659

[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Algorithm PPO with humanoid env for run_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1659743)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #1: Total reward: 1.6390349951559195
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #2: Total reward: 0.9134470845221159
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #3: Total reward: 1.344559998305117
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #4: Total reward: 0.10749572731994375
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #5: Total reward: 1.6176718695996934
[2m[36m(ServeReplica:RLModel pid=1659743)[0m > Try #6: Total reward: 1.037734101

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:04,866 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:07,007 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m 2023-05-19 13:23:08,199	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m 2023-05-19 13:23:08,200	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1660164)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1660

[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Algorithm PPO with humanoid env for run_param_13 task has been build.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1660129)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '168.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #1: Total reward: 1.1249464574878278
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #2: Total reward: 1.5470460699894333
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #3: Total reward: 1.4790064292057106
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #4: Total reward: 0.876996424283271
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #5: Total reward: 1.3481546688067674
[2m[36m(ServeReplica:RLModel pid=1660129)[0m > Try #6: Total reward: 0.5439154565

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:27,844 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:29,982 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m 2023-05-19 13:23:31,174	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m 2023-05-19 13:23:31,174	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1660551)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1660

[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Algorithm PPO with humanoid env for run_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1660513)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #1: Total reward: 1.4491377066613353
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #2: Total reward: 0.5507838355339159
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #3: Total reward: 1.8981398389710826
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #4: Total reward: 0.8373691585051618
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #5: Total reward: 0.5066433788359296
[2m[36m(ServeReplica:RLModel pid=1660513)[0m > Try #6: Total reward: 1.422281686

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:50,932 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:23:53,073 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m 2023-05-19 13:23:54,272	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m 2023-05-19 13:23:54,273	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1660936)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1660

[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Algorithm PPO with humanoid env for run_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1660898)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #1: Total reward: 1.9287516094800397
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #2: Total reward: 1.124149898967865
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #3: Total reward: 0.16451846862737743
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #4: Total reward: 0.27194992284192276
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #5: Total reward: 0.5401845062294525
[2m[36m(ServeReplica:RLModel pid=1660898)[0m > Try #6: Total reward: 1.50633493

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:24:14,021 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:24:16,165 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1661284)[0m 2023-05-19 13:24:17,357	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1661284)[0m 2023-05-19 13:24:17,357	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1661326)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1661

[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Algorithm PPO with humanoid env for run_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1661284)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1661284)[0m 2023-05-19 13:24:21,189	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_10/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1661284)[0m 2023-05-19 13:24:21,189	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 491.70674085617065, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1661545)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1661545)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1661545)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1661546)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #1: Total reward: 0.4105282475070278
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #2: Total reward: 1.1914277538269724
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #3: Total reward: 0.7412571623694758
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #4: Total reward: 0.11869435106388754
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #5: Total reward: 0.7465311937248289
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #6: Total reward: 1.7433465076767467
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #7: Total reward: 0.6746559328981289
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #8: Total reward: 1.227564882027964
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #9: Total reward: 1.9337950184178783
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #10: Total reward: 2.2974178636440326
[2m[36m(ServeReplica:RLModel pid=1661284)[0m > Try #11: Total reward: 0.82899250351621

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:24:37,114 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:24:39,254 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m 2023-05-19 13:24:40,447	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m 2023-05-19 13:24:40,447	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1661707)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1661

[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Algorithm PPO with humanoid env for run_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1661669)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #1: Total reward: 1.0332681895468112
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #2: Total reward: 0.27050539418081887
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #3: Total reward: 0.051716744061461206
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #4: Total reward: 1.543026049585743
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #5: Total reward: 1.2587665929353435
[2m[36m(ServeReplica:RLModel pid=1661669)[0m > Try #6: Total reward: 1.1369584

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:00,118 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:02,261 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m 2023-05-19 13:25:03,447	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m 2023-05-19 13:25:03,447	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1662088)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1662

[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Algorithm PPO with humanoid env for run_param_14 task has been build.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1662052)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '180.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #1: Total reward: 1.815693950003277
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #2: Total reward: 0.19076677958853971
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #3: Total reward: 1.7915513896869248
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #4: Total reward: 0.25942980045792124
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #5: Total reward: 0.2860348762468043
[2m[36m(ServeReplica:RLModel pid=1662052)[0m > Try #6: Total reward: 1.17997395

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:23,197 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:25,339 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m 2023-05-19 13:25:26,527	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m 2023-05-19 13:25:26,527	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1662472)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1662

[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Algorithm PPO with humanoid env for run_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1662435)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #1: Total reward: 0.4646627786855358
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #2: Total reward: 1.0381246758152272
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #3: Total reward: 0.35569544289083227
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #4: Total reward: 0.8393325514352376
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #5: Total reward: 1.323407333902057
[2m[36m(ServeReplica:RLModel pid=1662435)[0m > Try #6: Total reward: 0.7069905965097191
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:46,286 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:25:48,427 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1662820)[0m 2023-05-19 13:25:49,625	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1662820)[0m 2023-05-19 13:25:49,626	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1662858)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1662

[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Algorithm PPO with humanoid env for run_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1662820)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1662820)[0m 2023-05-19 13:25:53,361	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1662820)[0m 2023-05-19 13:25:53,361	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1663083)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1663083)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1663083)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1663084)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #1: Total reward: 1.494532086721133
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #2: Total reward: 1.6413486360293896
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #3: Total reward: 1.2504996803619957
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #4: Total reward: 1.692694990996217
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #5: Total reward: 0.2907189868278302
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #6: Total reward: 0.09541736594970866
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #7: Total reward: 0.9057933742030708
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #8: Total reward: 0.534181991073909
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #9: Total reward: 1.0931598846392763
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #10: Total reward: 1.2654013350853404
[2m[36m(ServeReplica:RLModel pid=1662820)[0m > Try #11: Total reward: 0.4907377976416597

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:09,387 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:11,528 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m 2023-05-19 13:26:12,726	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m 2023-05-19 13:26:12,726	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1663243)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1663

[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Algorithm PPO with humanoid env for run_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1663206)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #1: Total reward: 1.1460679804679637
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #2: Total reward: 1.1331562435574687
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #3: Total reward: 1.21338861153658
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #4: Total reward: 0.23411582813215262
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #5: Total reward: 0.32691707531441877
[2m[36m(ServeReplica:RLModel pid=1663206)[0m > Try #6: Total reward: 1.1769296369399322
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:32,378 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:34,515 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1663591)[0m 2023-05-19 13:26:35,706	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1663591)[0m 2023-05-19 13:26:35,707	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1663630)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1663

[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Algorithm PPO with humanoid env for run_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1663591)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1663591)[0m 2023-05-19 13:26:39,526	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_15/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1663591)[0m 2023-05-19 13:26:39,526	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 483.58794140815735, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1663853)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1663853)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1663853)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1663854)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker

[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #1: Total reward: 1.0236933974882958
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #2: Total reward: 1.211015019635492
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #3: Total reward: 1.4065325002927225
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #4: Total reward: 0.5145136816026473
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #5: Total reward: 1.6729038371903655
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #6: Total reward: 1.9249463899786527
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #7: Total reward: 0.8571140309609737
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #8: Total reward: 1.7358933711456717
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #9: Total reward: 1.2223454123583162
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #10: Total reward: 0.9582572072247826
[2m[36m(ServeReplica:RLModel pid=1663591)[0m > Try #11: Total reward: 0.592041205972453

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:55,474 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:26:57,617 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1663976)[0m 2023-05-19 13:26:58,810	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1663976)[0m 2023-05-19 13:26:58,810	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1664017)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1664

[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Algorithm PPO with humanoid env for run_param_15 task has been build.
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1663976)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '192.0'.


[2m[36m(ServeReplica:RLModel pid=1663976)[0m 2023-05-19 13:27:02,623	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1663976)[0m 2023-05-19 13:27:02,623	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1663976)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1663976)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1663976)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1664240)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2

[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #1: Total reward: 1.6839876813299444
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #2: Total reward: 0.815248372703906
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #3: Total reward: 0.9296504701021567
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #4: Total reward: 0.46674126858349746
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #5: Total reward: 0.7181778605870787
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #6: Total reward: 0.31400989497231846
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #7: Total reward: 0.9448206881001832
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #8: Total reward: 0.14500074452258005
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #9: Total reward: 0.6726578920939225
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #10: Total reward: 0.6552738699075353
[2m[36m(ServeReplica:RLModel pid=1663976)[0m > Try #11: Total reward: 1.181689990677

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:27:18,574 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:27:20,719 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m 2023-05-19 13:27:21,912	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m 2023-05-19 13:27:21,912	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1664399)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1664

[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Algorithm PPO with humanoid env for run_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1664360)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #1: Total reward: 0.016715115863923574
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #2: Total reward: 1.1860178323292065
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #3: Total reward: 1.5660247982513469
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #4: Total reward: 0.3592031188108843
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #5: Total reward: 0.10801259076888758
[2m[36m(ServeReplica:RLModel pid=1664360)[0m > Try #6: Total reward: 0.893268

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:27:41,567 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:27:43,709 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m 2023-05-19 13:27:44,888	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m 2023-05-19 13:27:44,888	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1664786)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1664

[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Algorithm PPO with humanoid env for run_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1664746)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #1: Total reward: 0.7991109261621133
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #2: Total reward: 0.8424349237473033
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #3: Total reward: 0.9087558040029116
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #4: Total reward: 1.2120551168970255
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #5: Total reward: 0.8459748595509868
[2m[36m(ServeReplica:RLModel pid=1664746)[0m > Try #6: Total reward: 1.475779478

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:04,819 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:06,962 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m 2023-05-19 13:28:08,163	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m 2023-05-19 13:28:08,164	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1665180)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1665

[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Algorithm PPO with humanoid env for run_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1665136)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #1: Total reward: 0.7083377336234978
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #2: Total reward: 1.3313142018655262
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #3: Total reward: 0.8673586258391913
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #4: Total reward: 0.5768529422295972
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #5: Total reward: 0.5401295102482245
[2m[36m(ServeReplica:RLModel pid=1665136)[0m > Try #6: Total reward: 0.908535341

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:27,910 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:30,053 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m 2023-05-19 13:28:31,243	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m 2023-05-19 13:28:31,244	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1665564)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1665

[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Algorithm PPO with humanoid env for run_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1665523)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #1: Total reward: 1.2953943844706186
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #2: Total reward: 0.5664318635871542
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #3: Total reward: 1.2923242371103554
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #4: Total reward: 0.31407873932600494
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #5: Total reward: 1.4054002894740178
[2m[36m(ServeReplica:RLModel pid=1665523)[0m > Try #6: Total reward: 1.20250201

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:51,014 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:28:53,156 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m 2023-05-19 13:28:54,339	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m 2023-05-19 13:28:54,339	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1665947)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1665

[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Algorithm PPO with humanoid env for run_param_16 task has been build.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1665910)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '204.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #1: Total reward: 1.0736875749875627
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #2: Total reward: 2.13015392364423
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #3: Total reward: 0.24687890268156754
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #4: Total reward: 1.1785755514341205
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #5: Total reward: 0.1096334238589956
[2m[36m(ServeReplica:RLModel pid=1665910)[0m > Try #6: Total reward: 0.9936628950

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:29:14,090 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:29:16,230 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m 2023-05-19 13:29:17,432	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m 2023-05-19 13:29:17,432	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1666335)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1666

[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Algorithm PPO with humanoid env for run_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1666295)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #1: Total reward: 1.5821837102532097
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #2: Total reward: 0.30725091179526637
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #3: Total reward: 0.9520143991622437
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #4: Total reward: 0.9268391825615002
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #5: Total reward: 0.5237651948553057
[2m[36m(ServeReplica:RLModel pid=1666295)[0m > Try #6: Total reward: 1.03089499

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:29:37,086 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:29:39,225 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1666678)[0m 2023-05-19 13:29:40,423	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1666678)[0m 2023-05-19 13:29:40,423	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1666719)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1666

[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Algorithm PPO with humanoid env for run_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1666678)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1666678)[0m 2023-05-19 13:29:44,195	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1666678)[0m 2023-05-19 13:29:44,195	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1666940)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1666940)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1666940)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1666939)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #1: Total reward: 0.8063446276524372
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #2: Total reward: 0.7149246358559582
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #3: Total reward: 0.3005919500801805
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #4: Total reward: 0.016875119957115957
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #5: Total reward: 0.11919285210332121
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #6: Total reward: 0.9537453590219969
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #7: Total reward: 1.3480084411608726
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #8: Total reward: 0.24477266827307168
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #9: Total reward: 0.4296172886551394
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #10: Total reward: 0.029374918552123312
[2m[36m(ServeReplica:RLModel pid=1666678)[0m > Try #11: Total reward: 0.91270973

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:00,178 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:02,320 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m 2023-05-19 13:30:03,507	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m 2023-05-19 13:30:03,508	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1667103)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1667

[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Algorithm PPO with humanoid env for run_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1667061)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #1: Total reward: 0.8545389280512956
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #2: Total reward: 1.5578503061008926
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #3: Total reward: 0.3355888468115082
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #4: Total reward: 1.7921573239388604
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #5: Total reward: 1.5206967349700133
[2m[36m(ServeReplica:RLModel pid=1667061)[0m > Try #6: Total reward: 0.330587576

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:23,274 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:25,418 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1667447)[0m 2023-05-19 13:30:26,604	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1667447)[0m 2023-05-19 13:30:26,605	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1667486)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1667

[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Algorithm PPO with humanoid env for run_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1667447)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1667447)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1667447)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1667447)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #1: Total reward: 0.13882711656533228
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #2: Total reward: 1.5956458009417849
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #3: Total reward: 1.4539157142619366
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #4: Total reward: 0.7015856823407797
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #5: Total reward: 0.8598895292871569
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #6: Total reward: 0.9826662801207314
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #7: Total reward: 0.7174109749520957
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #8: Total reward: 0.9894864184620813
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #9: Total reward: 0.8439208262963626
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #10: Total reward: 0.5886831817718116
[2m[36m(ServeReplica:RLModel pid=1667447)[0m > Try #11: Total reward: 0.1759349141490

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:46,362 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:30:48,505 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1667833)[0m 2023-05-19 13:30:49,692	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1667833)[0m 2023-05-19 13:30:49,692	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1667869)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1667

[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Algorithm PPO with humanoid env for run_param_17 task has been build.
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1667833)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '216.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1667833)[0m 2023-05-19 13:30:53,463	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1667833)[0m 2023-05-19 13:30:53,463	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1668094)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1668094)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1668094)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1668095)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #1: Total reward: 0.8523612303765588
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #2: Total reward: 0.47484337023852113
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #3: Total reward: 0.8552130264186568
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #4: Total reward: 1.1295858263820535
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #5: Total reward: 0.43237613144718806
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #6: Total reward: 1.2638469605806306
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #7: Total reward: 0.6915677944726486
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #8: Total reward: 1.0932492636150666
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #9: Total reward: 1.8395901930102645
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #10: Total reward: 0.2551558577138095
[2m[36m(ServeReplica:RLModel pid=1667833)[0m > Try #11: Total reward: 0.331790056608

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:09,339 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:11,477 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1668217)[0m 2023-05-19 13:31:12,671	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1668217)[0m 2023-05-19 13:31:12,671	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1668253)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1668

[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Algorithm PPO with humanoid env for run_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1668217)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1668217)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1668217)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1668217)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #1: Total reward: 1.1986719193798459
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #2: Total reward: 0.9688160617973449
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #3: Total reward: 0.41065971154220166
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #4: Total reward: 0.6768361841074042
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #5: Total reward: 1.0909381139140397
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #6: Total reward: 1.901285662379528
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #7: Total reward: 1.0427568445141322
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #8: Total reward: 1.3937154550709947
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #9: Total reward: 1.0873784266149602
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #10: Total reward: 0.663063622196711
[2m[36m(ServeReplica:RLModel pid=1668217)[0m > Try #11: Total reward: 0.053648393099610

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:32,422 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:34,564 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1668600)[0m 2023-05-19 13:31:35,762	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1668600)[0m 2023-05-19 13:31:35,762	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1668636)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1668

[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Algorithm PPO with humanoid env for run_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1668600)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1668600)[0m 2023-05-19 13:31:39,482	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_5/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1668600)[0m 2023-05-19 13:31:39,482	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 495.82029914855957, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1668859)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1668859)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1668859)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1668860)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #1: Total reward: 1.6777364904880208
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #2: Total reward: 0.5686390890080181
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #3: Total reward: 1.783787473664707
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #4: Total reward: 1.6730731280271212
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #5: Total reward: 0.30122651828831754
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #6: Total reward: 1.5607038236263915
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #7: Total reward: 1.5406637684517346
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #8: Total reward: 1.1200111487537974
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #9: Total reward: 0.8086556168262596
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #10: Total reward: 0.8244933491197984
[2m[36m(ServeReplica:RLModel pid=1668600)[0m > Try #11: Total reward: 0.21071031482214

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:55,512 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:31:57,650 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m 2023-05-19 13:31:58,838	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m 2023-05-19 13:31:58,839	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1669021)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1669

[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Algorithm PPO with humanoid env for run_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1668981)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #1: Total reward: 0.0953098035246437
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #2: Total reward: 0.337256471931573
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #3: Total reward: 1.1779182754649544
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #4: Total reward: 0.7593286750223267
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #5: Total reward: 0.3199413698062101
[2m[36m(ServeReplica:RLModel pid=1668981)[0m > Try #6: Total reward: 1.2046553257

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:32:18,612 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:32:20,753 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m 2023-05-19 13:32:21,949	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m 2023-05-19 13:32:21,949	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1669399)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1669

[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Algorithm PPO with humanoid env for run_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1669364)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #1: Total reward: 1.6312224503011736
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #2: Total reward: 0.5531403507526425
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #3: Total reward: 0.1597011801186948
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #4: Total reward: 1.226675486993964
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #5: Total reward: 0.2227953215440767
[2m[36m(ServeReplica:RLModel pid=1669364)[0m > Try #6: Total reward: 1.9983194896

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:32:41,696 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:32:43,837 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1669750)[0m 2023-05-19 13:32:45,030	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1669750)[0m 2023-05-19 13:32:45,031	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1669786)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1669

[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Algorithm PPO with humanoid env for run_param_18 task has been build.
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1669750)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '228.00000000000003'.


[2m[36m(ServeReplica:RLModel pid=1669750)[0m 2023-05-19 13:32:48,738	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1669750)[0m 2023-05-19 13:32:48,738	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1670009)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1670009)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1670009)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1670010)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #1: Total reward: 0.2336816078093444
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #2: Total reward: 0.5828230317872545
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #3: Total reward: 0.8396753468790586
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #4: Total reward: 0.6239186442267415
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #5: Total reward: 0.18141000207882682
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #6: Total reward: 1.1120728899812726
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #7: Total reward: 1.656083778315345
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #8: Total reward: 1.6079858520532184
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #9: Total reward: 0.7514721753385163
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #10: Total reward: 1.1893143251904252
[2m[36m(ServeReplica:RLModel pid=1669750)[0m > Try #11: Total reward: 0.75720075420772

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:04,693 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:06,830 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1670135)[0m 2023-05-19 13:33:08,018	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1670135)[0m 2023-05-19 13:33:08,019	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1670175)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1670

[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Algorithm PPO with humanoid env for run_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1670135)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.


[2m[36m(ServeReplica:RLModel pid=1670135)[0m 2023-05-19 13:33:11,816	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_0/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1670135)[0m 2023-05-19 13:33:11,816	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 492.5820164680481, '_episodes_total': 1000}
[2m[36m(ServeReplica:RLModel pid=1670135)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1670135)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1670135)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1670394)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m

[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #1: Total reward: 1.1834878506036668
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #2: Total reward: 0.11845095081578827
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #3: Total reward: 0.6311083124587105
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #4: Total reward: 0.5802559734441194
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #5: Total reward: 0.07307591465749476
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #6: Total reward: 1.606909196989003
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #7: Total reward: 0.23687456869949478
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #8: Total reward: 0.21034871616784773
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #9: Total reward: 0.3117543854129261
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #10: Total reward: 0.6270109677744594
[2m[36m(ServeReplica:RLModel pid=1670135)[0m > Try #11: Total reward: 1.82406806588

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:27,790 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:29,932 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1670517)[0m 2023-05-19 13:33:31,139	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1670517)[0m 2023-05-19 13:33:31,139	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1670552)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1670

[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Algorithm PPO with humanoid env for run_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1670517)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.


[2m[36m(ServeReplica:RLModel pid=1670517)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(ServeReplica:RLModel pid=1670517)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(ServeReplica:RLModel pid=1670517)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #1: Total reward: 1.7209896862784675
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #2: Total reward: 0.4445612673681562
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #3: Total reward: 1.0598747929804888
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #4: Total reward: 0.3603232265324994
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #5: Total reward: 0.12724151143388404
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #6: Total reward: 1.6049418284332477
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #7: Total reward: 2.003895619137794
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #8: Total reward: 1.4609469495889793
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #9: Total reward: 1.2910450483181264
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #10: Total reward: 1.2286392602143181
[2m[36m(ServeReplica:RLModel pid=1670517)[0m > Try #11: Total reward: 0.10700915389372

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:50,890 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:33:53,039 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m 2023-05-19 13:33:54,230	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m 2023-05-19 13:33:54,231	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1670943)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1670

[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Algorithm PPO with humanoid env for run_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1670901)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #1: Total reward: 0.934432305542791
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #2: Total reward: 0.04239595581699197
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #3: Total reward: 0.09024986318619298
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #4: Total reward: 1.6929717691603177
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #5: Total reward: 1.0837219944106384
[2m[36m(ServeReplica:RLModel pid=1670901)[0m > Try #6: Total reward: 0.5162952126695555
[2m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:34:13,889 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:34:16,028 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m 2023-05-19 13:34:17,229	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m 2023-05-19 13:34:17,229	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1671324)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1671

[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Algorithm PPO with humanoid env for run_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1671285)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #1: Total reward: 0.532663130077272
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #2: Total reward: 1.0417943986315013
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #3: Total reward: 0.3355095485903095
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #4: Total reward: 0.5784688514598948
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #5: Total reward: 1.1022284178048
[2m[36m(ServeReplica:RLModel pid=1671285)[0m > Try #6: Total reward: 0.2186872615734375
[2m[36m

[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:34:36,970 controller 1594951 deployment_state.py:1214 - Stopping 1 replicas of deployment 'RLModel' with outdated versions.
[2m[36m(ServeController pid=1594951)[0m INFO 2023-05-19 13:34:39,112 controller 1594951 deployment_state.py:1310 - Adding 1 replica to deployment 'RLModel'.
[2m[36m(ServeReplica:RLModel pid=1671671)[0m 2023-05-19 13:34:40,303	INFO ppo.py:379 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(ServeReplica:RLModel pid=1671671)[0m 2023-05-19 13:34:40,303	INFO algorithm.py:457 -- Current log_level is ERROR. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(RolloutWorker pid=1671711)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1671

[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Algorithm PPO with humanoid env for run_param_19 task has been build.
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Checkpoint loaded.
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Preparing CSV file.
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Evaluating...
[2m[36m(ServeReplica:RLModel pid=1671671)[0m [dmc_custom_envs][DEBUG] File: humanoid.xml | Replacing '$GEAR' with '240.0'.


[2m[36m(ServeReplica:RLModel pid=1671671)[0m 2023-05-19 13:34:44,110	INFO trainable.py:766 -- Restored on 149.156.105.73 from checkpoint: /mnt/ws/eval_workdir_checkpoints/PPO/humanoid-run_20/final_checkpoint/checkpoint_000125
[2m[36m(ServeReplica:RLModel pid=1671671)[0m 2023-05-19 13:34:44,110	INFO trainable.py:775 -- Current state after restoring: {'_iteration': 125, '_timesteps_total': None, '_time_total': 435.5413944721222, '_episodes_total': 1000}
[2m[36m(RolloutWorker pid=1671934)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker pid=1671934)[0m   dim = np.int(np.prod(s.shape))
[2m[36m(RolloutWorker pid=1671934)[0m   logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
[2m[36m(RolloutWorker pid=1671935)[0m Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
[2m[36m(RolloutWorker 

[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #1: Total reward: 0.018327385165929914
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #2: Total reward: 1.1653720087497321
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #3: Total reward: 0.3055889193093702
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #4: Total reward: 0.4267303270994162
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #5: Total reward: 1.4519488753265384
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #6: Total reward: 1.4050160969603147
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #7: Total reward: 0.6633946574858801
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #8: Total reward: 0.8611501721953178
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #9: Total reward: 0.19943236278226706
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #10: Total reward: 0.938814427098704
[2m[36m(ServeReplica:RLModel pid=1671671)[0m > Try #11: Total reward: 1.140012477297