In [54]:
import os
import shutil
import subprocess
import time

import gymnasium as gym
import imageio
import numpy as np
import torch
import tqdm
from stable_baselines3 import PPO

In [55]:
class CustomRewardWrapper(gym.Wrapper):
    def __init__(self, env):
        super(CustomRewardWrapper, self).__init__(env)

    def step(self, action):
        obs, reward, done, truncated, info = self.env.step(action)
        # Access relevant state and action variables
        
        #state
        z = obs[0]                  # Height of the robot
        a = obs[1]                  # Angle of the robot
        a_hip = obs[2]              # Angle of the hip
        a_knee = obs[3]             # Angle of the knee
        a_ankle = obs[4]            # Angle of the ankle
        v_x = obs[5]                # Velocity in x direction
        v_z = obs[6]                # Velocity in z direction
        a_d = obs[7]                # Angular velocity 
        a_hip_d = obs[8]            # Angular velocity of the hip
        a_knee_d = obs[9]           # Angular velocity of the knee
        a_ankle_d = obs[10]         # Angular velocity of the ankle
        
        #action
        torque_hip = action[0]      # Torque applied to the hip
        torque_knee = action[1]     # Torque applied to the knee
        torque_ankle = action[2]    # Torque applied to the ankle

        #vel_act = action[0] * obs[8] + obs[9] * action[1] + action[2] * obs[10]

        #different criteria for reward
        energy_used = np.sum(np.square(action))  # Simplistic energy calculation

        # Custom reward logic
        landing_reward = a_ankle_d + a_knee_d + a_hip_d

        backroll = -obs[7]
        height = obs[0]
        vel_act = - 2 * torque_hip * a_hip + torque_knee * a_knee + torque_ankle * a_ankle
        backslide = -obs[5]
        custom_reward = backroll * (1.0 + 10 * height + .6 * vel_act + .5 * backslide)
        if done:
            custom_reward -= 10  # Heavy penalty for falling

        return obs, custom_reward, done, truncated, info

In [56]:
healthy_reward = 1
healthy_z_range = (0.2, float("inf"))
healthy_angle_range = (-float("inf"), float("inf"))
reset_noise_scale = 5e-3
exclude_current_positions_from_observation = True

env = gym.make('Hopper-v4', render_mode='rgb_array', healthy_reward=healthy_reward, healthy_z_range=healthy_z_range, healthy_angle_range=healthy_angle_range, reset_noise_scale=reset_noise_scale, exclude_current_positions_from_observation=exclude_current_positions_from_observation)
env = CustomRewardWrapper(env)

model = PPO("MlpPolicy", env, verbose=1)

# Load the model
d = "2024-08-11_16-27-28"
folder = "C:/Users/killi/Documents/code/Hopper-4-flip/Model"
file = folder + "/hopper_model_%s"%d
model.load(file, env=env)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


<stable_baselines3.ppo.ppo.PPO at 0x24aef554680>

In [57]:
vec_env = model.get_env()
obs = vec_env.reset()

writer = imageio.get_writer('hopper-flip.mp4', fps=50)

N_step = 100000
for i in range(N_step):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    writer.append_data(vec_env.render("rgb_array"))
    #VecEnv resets automatically
    #print(f"ankle_angle: {obs[0, 4]}")
    #print(f"hip_angle: {obs[0, 2]}")
    #print(f"sum of the ang: {np.pi/2+np.sum(obs[0, 2:5])}")
    #print(f"angle of torso: {obs[0, 1]}")
    if done:
      print("Episode finished after {} timesteps".format(i+1))
      break
      obs = vec_env.reset()

writer.close()

hip_angle: 0.0009259218994021833
hip_angle: 0.0007882474423158145
hip_angle: 0.0006055091170892165
hip_angle: 0.00037224475941982307
hip_angle: 8.395334711420804e-05
hip_angle: -0.00026316704336491184
hip_angle: -0.0006725356451134314
hip_angle: -0.0011473545209527269
hip_angle: -0.0016906892726869793
hip_angle: -0.0023055052844469163
hip_angle: -0.002977783856248609
hip_angle: -0.0036412303073826246
hip_angle: -0.004319671375272559
hip_angle: -0.005035774224104186
hip_angle: -0.005802928892725901
hip_angle: -0.006624209167090461
hip_angle: -0.007498719738660812
hip_angle: -0.008424023044348746
hip_angle: -0.009396840232748196
hip_angle: -0.010413596192808329
hip_angle: -0.011470823335310118
hip_angle: -0.012565436860008813
hip_angle: -0.013694898729800094
hip_angle: -0.014857291089989283
hip_angle: -0.016051320873079405
hip_angle: -0.017276276081911277
hip_angle: -0.018531951552429482
hip_angle: -0.019818558760717433
hip_angle: -0.021136630826507123
hip_angle: -0.022486930736687352
hi