In [1]:
import argparse
import os
import random
import time
from distutils.util import strtobool

import gym
import isaacgym  # noqa
import isaacgymenvs
import torch
import torch.nn as nn
import numpy as np
from custom_layers import BayesianLinear
from ppo_continuous_action_isaacgym import layer_init
from torch.distributions.normal import Normal
from copy import deepcopy

def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
    torch.nn.init.orthogonal_(layer.weight, std)
    torch.nn.init.constant_(layer.bias, bias_const)
    return layer

class Agent(nn.Module):
    def __init__(self, envs):
        super().__init__()
        self.critic = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 256)),
            nn.Tanh(),
            layer_init(nn.Linear(256, 256)),
            nn.Tanh(),
            layer_init(nn.Linear(256, 1), std=1.0),
        )
        self.actor_mean = nn.Sequential(
            layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 256)),
            nn.Tanh(),
            layer_init(nn.Linear(256, 256)),
            nn.Tanh(),
            layer_init(nn.Linear(256, np.prod(envs.single_action_space.shape)), std=0.01),
        )
        self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))

    def get_value(self, x):
        return self.critic(x)

    def get_action_and_value(self, x, action=None):
        action_mean = self.actor_mean(x)
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x)

class SampledAgent(Agent):
    def __init__(self, envs, critic, actor_mean, actor_logstd):
        super().__init__(envs=envs)
        self.critic = critic
        self.actor_mean = actor_mean
        self.actor_logstd = actor_logstd

class BayesianAgent(nn.Module):
    def __init__(self, envs, number_of_cell_types):
        super().__init__()
        self.critic = nn.Sequential(
            BayesianLinear(np.array(envs.single_observation_space.shape).prod(), 256, neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            nn.Tanh(),
            BayesianLinear(256, 256, neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            nn.Tanh(),
            # BayesianLinear(256, 1, neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            layer_init(nn.Linear(256, 1), std=1.0),
        )
        self.actor_mean = nn.Sequential(
            BayesianLinear(np.array(envs.single_observation_space.shape).prod(), 256, neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            nn.Tanh(),
            BayesianLinear(256, 256, neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            nn.Tanh(),
            # BayesianLinear(256, np.prod(envs.single_action_space.shape), neuron_types_in=number_of_cell_types, neuron_types_out=number_of_cell_types),
            layer_init(nn.Linear(256, np.prod(envs.single_action_space.shape)), std=0.01),
        )
        self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
        self.envs = envs

    def get_value(self, x):
        return self.critic(x)

    def get_action_and_value(self, x, action=None):
        action_mean = self.actor_mean(x)
        action_logstd = self.actor_logstd.expand_as(action_mean)
        action_std = torch.exp(action_logstd)
        probs = Normal(action_mean, action_std)
        if action is None:
            action = probs.sample()
        return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x)

    def construct_vanilla_layer(self, weights, biases):
        layer = nn.Linear(weights.shape[1], weights.shape[0], bias=True)
        layer.weight.data = weights
        layer.bias.data = biases
        return layer

    def sample_vanilla_agent(
        self
    ):
        actor_mean_layers = []
        critic_layers = []
        for _, a_layer in enumerate(self.actor_mean):
            if type(a_layer) == BayesianLinear:
                actor_mean_layers.append(
                    self.construct_vanilla_layer(
                        a_layer.weight_sampler.sample(),
                        a_layer.bias_sampler.sample(),
                    )
                )
                actor_mean_layers.append(nn.Tanh())
            elif type(a_layer) == nn.Linear:
                actor_mean_layers.append(deepcopy(a_layer))

        for _, a_layer in enumerate(self.critic):
            if type(a_layer) == BayesianLinear:
                critic_layers.append(
                    self.construct_vanilla_layer(
                        a_layer.weight_sampler.sample(),
                        a_layer.bias_sampler.sample(),
                    )
                )
                critic_layers.append(nn.Tanh())
            elif type(a_layer) == nn.Linear:
                critic_layers.append(deepcopy(a_layer))

        actor_mean = nn.Sequential(*actor_mean_layers)
        actor_logstd = deepcopy(self.actor_logstd)
        critic = nn.Sequential(*critic_layers)
        vanilla_agent = SampledAgent(self.envs, critic, actor_mean, actor_logstd)
        return vanilla_agent

Importing module 'gym_38' (/home/amavorpa/isaacgym/python/isaacgym/_bindings/linux-x86_64/gym_38.so)
Setting GYM_USD_PLUG_INFO_PATH to /home/amavorpa/isaacgym/python/isaacgym/_bindings/linux-x86_64/usd/plugInfo.json


In [2]:
envs = isaacgymenvs.make(
    seed=1,
    task="Anymal",
    num_envs=4096 * 2,
    sim_device="cuda:0",
    rl_device="cuda:0",
    graphics_device_id=0,
    headless=False if torch.cuda.is_available() else True,
    multi_gpu=False,
    virtual_screen_capture=False,
    force_render=False)

from ppo_continuous_action_isaacgym import ExtractObsWrapper, RecordEpisodeStatisticsTorch

device = torch.device("cuda")
envs = ExtractObsWrapper(envs)
envs = RecordEpisodeStatisticsTorch(envs, device)
envs.single_action_space = envs.action_space
envs.single_observation_space = envs.observation_space
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
agent = BayesianAgent(envs=envs, number_of_cell_types=256).cuda()

sampled_agent = agent.sample_vanilla_agent()

PyTorch version 2.1.1+cu121
Device count 2
/home/amavorpa/isaacgym/python/isaacgym/_bindings/src/gymtorch


Using /home/amavorpa/.cache/torch_extensions/py38_cu121 as PyTorch extensions root...
Emitting ninja build file /home/amavorpa/.cache/torch_extensions/py38_cu121/gymtorch/build.ninja...
Building extension module gymtorch...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)


ninja: no work to do.


Loading extension module gymtorch...
2023-11-30 18:13:57,990 - INFO - logger - logger initialized
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  def get_axis_params(value, axis_idx, x_value=0., dtype=np.float, n_dims=3):
  from collections import Mapping
  from collections import Mapping, Set, Iterable


Error: FBX library failed to load - importing FBX data will not succeed. Message: No module named 'fbx'
FBX tools must be installed from https://help.autodesk.com/view/FBX/2020/ENU/?guid=FBX_Developer_Help_scripting_with_python_fbx_installing_python_fbx_html


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  (np.int, "int"), (np.int8, "int"),
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  with initialize(config_path="./cfg"):
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
  asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE


Not connected to PVD
+++ Using GPU PhysX
Physics Engine: PhysX
Physics Device: cuda:0
GPU Pipeline: enabled


[Error] [carb.windowing-glfw.plugin] GLFW initialization failed.
[Error] [carb.windowing-glfw.plugin] GLFW window creation failed!
[Error] [carb.gym.plugin] Failed to create Window in CreateGymViewerInternal


In [None]:
envs = isaacgymenvs.make(
    seed=1,
    task="Anymal",
    num_envs=4096 * 2,
    sim_device="cuda:0",
    rl_device="cuda:0",
    graphics_device_id=0,
    headless=False if torch.cuda.is_available() else True,
    multi_gpu=False,
    virtual_screen_capture=False,
    force_render=False)

from ppo_continuous_action_isaacgym import ExtractObsWrapper, RecordEpisodeStatisticsTorch

device = torch.device("cuda")
envs = ExtractObsWrapper(envs)
envs = RecordEpisodeStatisticsTorch(envs, device)
envs.single_action_space = envs.action_space
envs.single_observation_space = envs.observation_space
assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
agent = BayesianAgent(envs=envs, number_of_cell_types=256).cuda()

sampled_agent = agent.sample_vanilla_agent()