In [4]:
import os
import shutil
import subprocess
import time

import gymnasium as gym
import imageio
import numpy as np
import torch
import tqdm
from stable_baselines3 import PPO

In [5]:
class CustomRewardWrapper(gym.Wrapper):
    def __init__(self, env):
        super(CustomRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        # Access relevant state and action variables
        
        #state
        z = obs[0]                  # Height of the robot
        a = obs[1]                  # Angle of the robot
        a_hip = obs[2]              # Angle of the hip
        a_knee = obs[3]             # Angle of the knee
        a_ankle = obs[4]            # Angle of the ankle
        v_x = obs[5]                # Velocity in x direction
        v_z = obs[6]                # Velocity in z direction
        a_d = obs[7]                # Angular velocity 
        a_hip_d = obs[8]            # Angular velocity of the hip
        a_knee_d = obs[9]           # Angular velocity of the knee
        a_ankle_d = obs[10]         # Angular velocity of the ankle
        
        #action
        torque_hip = action[0]      # Torque applied to the hip
        torque_knee = action[1]     # Torque applied to the knee
        torque_ankle = action[2]    # Torque applied to the ankle

        #vel_act = action[0] * obs[8] + obs[9] * action[1] + action[2] * obs[10]

        #different criteria for reward
        energy_used = np.sum(np.square(action))  # Simplistic energy calculation

        # Custom reward logic
        landing_reward = a_ankle_d + a_knee_d + a_hip_d

        backroll = -obs[7]
        height = obs[0]
        vel_act = - 2 * torque_hip * a_hip + torque_knee * a_knee + torque_ankle * a_ankle
        backslide = -obs[5]
        custom_reward = backroll * (1.0 + 10 * height + .6 * vel_act + .5 * backslide)
        if done:
            custom_reward -= 10  # Heavy penalty for falling

        return obs, custom_reward, done, truncated, info

In [6]:
healthy_reward = 1
healthy_z_range = (0.2, float("inf"))
healthy_angle_range = (-float("inf"), float("inf"))
reset_noise_scale = 5e-3
exclude_current_positions_from_observation = True

env = gym.make('Hopper-v4', render_mode='rgb_array', healthy_reward=healthy_reward, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation)
env = CustomRewardWrapper(env)

model = PPO("MlpPolicy", env, verbose=1)

# Load the model
d = "2024-08-13_15-32-08"
#folder = "C:/Users/killi/Documents/code/Hopper-4-flip/Model"
folder = "/home/killian/Documents/code/hopper/Model"
file = folder + "/hopper_model_%s"%d
model.load(file, env=env)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


<stable_baselines3.ppo.ppo.PPO at 0x7ff1617df2c0>

In [7]:
vec_env = model.get_env()
obs = vec_env.reset()

# Action and observation mapping
mapping = {"z": 0, "a": 1, "a_hip": 2, "a_knee": 3, "a_ankle": 4, "v_x": 5, "v_z": 6, "a_d": 7, "a_hip_d": 8, "a_knee_d": 9, "a_ankle_d": 10}

N_step = 1000
s_a = np.zeros((N_step, 14))  # 11 states + 3 actions
rwd = np.zeros((N_step, 4))   # vel_act, get_straight, landing, flipping

for i in range(N_step):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    
    s_a[i, :] = np.concatenate((obs[0], action[0]))
    #rwd[i, :] = [env.vel_act, env.get_straight, env.landing, env.flipping]
    
    if done:
        N_stop = i
        print(f"Episode finished after {i+1} timesteps")
        break
        obs = vec_env.reset()

out.release()

# Truncate the arrays to only include the steps before termination
s_a = s_a[:N_stop, :]
rwd = rwd[:N_stop, :]

# Plotting function
def plot(map: str="z"):
    plt.figure()
    plt.plot(s_a[:, mapping[map]], label=map)
    plt.legend()
    plt.show()

# Plot all observations
for key in mapping.keys():
    plot(key)

# Plot rewards
plt.figure("reward")
plt.plot(rwd[:, 0], label="vel_act")
plt.plot(rwd[:, 1], label="get_straight")
plt.plot(rwd[:, 2], label="landing")
plt.plot(rwd[:, 3], label="flipping")
plt.legend()
plt.show()

  logger.warn(


AttributeError: 'HopperEnv' object has no attribute 'vel_act'