In [1]:
from MuJoCo_Gym.mujoco_rl import MuJoCo_RL
import numpy as np
import random

In [2]:
class Language():
    def __init__(self, mujoco_gym):
        self.mujoco_gym = mujoco_gym
        self.observation_space = {"low":[0], "high":[3]}
        self.action_space = {"low":[0], "high":[3]}
        # The datastore is used to store and preserve data over one or multiple timesteps
        self.dataStore = {}

    def dynamic(self, agent, actions):

        # At timestep 0, the utterance field has to be initialized
        if "utterance" not in self.mujoco_gym.dataStore[agent].keys():
            self.mujoco_gym.dataStore[agent]["utterance"] = 0

        # Extract the utterance from the agents action
        utterance = int(actions[0])

        # Store the utterance in the dataStore for the environment
        self.mujoco_gym.dataStore[agent]["utterance"] = utterance
        otherAgent = [other for other in self.mujoco_gym.agents if other!=agent][0]

        # Check whether the other agent has "spoken" yet (not at timestep 0)
        if "utterance" in self.mujoco_gym.dataStore[otherAgent]:
            utteranceOtherAgent = self.mujoco_gym.dataStore[otherAgent]["utterance"]
            return 0, np.array([utteranceOtherAgent])
        else:
            return 0, np.array([0])

In [3]:
def reward_function(mujoco_gym, agent):
    # Creates all the necessary fields to store the needed data within the dataStore at timestep 0 
    if "targets" not in mujoco_gym.dataStore[agent].keys():
        mujoco_gym.dataStore["targets"] = mujoco_gym.filterByTag("target")
        mujoco_gym.dataStore[agent]["current_target"] = mujoco_gym.dataStore["targets"][random.randint(0, len(mujoco_gym.dataStore["targets"]) - 1)]["name"]
        distance = mujoco_gym.distance(agent, mujoco_gym.dataStore[agent]["current_target"])
        mujoco_gym.dataStore[agent]["distance"] = distance
        new_reward = 0
    else: # Calculates the distance between the agent and the current target
        distance = mujoco_gym.distance(agent, mujoco_gym.dataStore[agent]["current_target"])
        new_reward = mujoco_gym.dataStore[agent]["distance"] - distance
        mujoco_gym.dataStore[agent]["distance"] = distance
    reward = new_reward * 10
    return reward

In [4]:
def done_function(mujoco_gym, agent):
    if mujoco_gym.dataStore[agent]["distance"] <= 1:
        return True
    else:
        return False

In [5]:
environment_path = "Environment/MultiEnvs.xml"
info_path = "Environment/info_example.json"
agents = ["agent1_torso", "agent2_torso", "agent3_torso"]
config_dict = {"xmlPath":environment_path, "infoJson":info_path, "agents":agents, "rewardFunctions":[reward_function], "doneFunctions":[done_function], "renderMode":True}
environment = MuJoCo_RL(config_dict)

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


In [6]:
environment.reset()
while True:
    try:
        # print(test_env._action_space)
        action = {"agent3_torso": environment._action_space["agent3_torso"].sample(), "agent2_torso": environment._action_space["agent2_torso"].sample(), "agent1_torso": environment._action_space["agent1_torso"].sample()}
        action["agent3_torso"][1:2] = 1
        action["agent2_torso"][1:2] = 1
        observations, current_rewards, terminations, truncations, infos = environment.step(action)

        if terminations["__all__"] == True or truncations["__all__"] == True:
            environment.reset()
    except KeyboardInterrupt:
        break