In [5]:
import numpy as np
from gym.envs.box2d import BipedalWalker
import warnings

In [7]:
class WalkerAgent(BipedalWalker):
    def step(self, action):
        state, reward, done, o = super().step(action)
        # Hull has angular velocitry, x.velocity, and y.velocity, each normalized by FPS.
        # Leg 0 Joint 0 has an angle and speed, normalized by SPEED_HIP
        # Leg 0 Joint 1 has angle+1, and speed normalized by SPEED_KNEE
        # Leg 0 Contact is boolean.
        # Leg 1 same as Leg 0.
        for a in action:
            total_torque += np.clip(np.abs(a), 0, 1)

        features = np.array([
                    self.hull.position[0], # 0 distance traveled
                    np.abs(state[0]),      # 1 head stability
                    np.abs(total_torque),  # 2 torque per step
                    state[8] and state[13],# 3 legs up, jump 

                    np.abs(state[4]),      # 4 leg0 hip angle
                    np.abs(state[5]),      # 5 leg0 hip speed
                    np.abs(state[6]),      # 6 leg0 knee angle
                    np.abs(state[7]),      # 7 leg0 knee speed

                    np.abs(state[9]),      # 8 leg1 hip angle
                    np.abs(state[10]),     # 9 leg1 hip speed
                    np.abs(state[11]),     # 10 leg1 knee angle
                    np.abs(state[12])      # 11 leg1 knee speed
                    ])
        return np.array(state), reward, features, done, o

In [9]:
# Now we will sample the environment output for a few agent actions.
def simulate(model, env, render_mode=False, num_episodes=5):
    #reward_list = []
    #t_list = []
    max_episode_length = 3000
    episodes_reward_sum = 0
    episodes_feature_sum = (0,) * 12
    total_reward = 0.0
    total_features = (0,) * 12

    for episode in range(num_episode):
        #start_time = timer()
        obs = env.reset()

        if obs is None:
            obs = np.zeros(model.input_size)

        for t in range(max_episode_length):
            if render_mode:
                env.render("human")

            #action = model.get_action(obs, t=t, mean_mode=False)
            action = model.get_action(obs)
            prev_obs = obs
            obs, reward, features, done, info = env.step(action)

            if render_mode:
                pass
                #print("action", action, "step reward", reward)
                #print("step reward", reward)
            total_reward += reward
            total_features = tuple(x + y for x,y in zip(total_features, features))

            if done:
                break

        if render_mode:
            print("reward", total_reward, "timesteps", t)
        #reward_list.append(total_reward)
        #t_list.append(t)
        #duration = timer() - start_time
        #print(f"DEBUG simulate duration: {duration / t}")

    total_features = tuple(x + y for x,y in zip(total_features, features))
    total_features = tuple(x/t for x in total_features)

    episodes_reward_sum += total_reward
    episodes_feature_sum = tuple(x + y for x,y in zip(episodes_feature_sum, total_features))

    episode_avg_reward = episodes_reward_sum / num_episode
    episode_avg_features = tuple(x/num_episode for x in episodes_feature_sum)

    #return reward_list, t_list
    #print("MODEL: REWARD", episode_avg_reward)
    #print("MODEL: AVG FEATURES (orig, leg0, leg1)", episode_avg_features)
    #return tuple(reward_list), tuple(total_features)

    #scores = {'AvgReward': episode_avg_reward, 'Distance',

    scores = {
            "meanAvgReward": episode_avg_reward,
            "meanDistance": episode_avg_features[0],
            "meanHeadStability": episode_avg_features[1],
            "meanTorquePerStep": episode_avg_features[2],
            "meanJump": episode_avg_features[3],
            "meanLeg0HipAngle": episode_avg_features[4],
            "meanLeg0HipSpeed": episode_avg_features[5],
            "meanLeg0KneeAngle": episode_avg_features[6],
            "meanLeg0KneeSpeed": episode_avg_features[7],
            "meanLeg1HipAngle": episode_avg_features[8],
            "meanLeg1HipSpeed": episode_avg_features[9],
            "meanLeg1KneeAngle": episode_avg_features[10],
            "meanLeg1KneeSpeed": episode_avg_features[11]
    }

    #return (episode_avg_reward,), tuple(episode_avg_features)
    return scores

In [10]:
def make_env(seed=-1):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        env = QDBipedalWalker()

    if (seed >= 0):
        env.seed(seed)

    #print("environment details")
    #print("env.action_space", env.action_space)
    #print("high, low", env.action_space.high, env.action_space.low)
    #print("environment details")
    #print("env.observation_space", env.observation_space)
    #print("high, low", env.observation_space.high, env.observation_space.low)
    #assert False
    return env