In [1]:
import gymnasium as gym
from stable_baselines3 import SAC
import numpy as np
import os
from datetime import datetime

# Modifying the Environment using Action Wrappers

**What are Actions?**
- in the walker2D actions are torque values applied to the joints of the agent 
- define how strongly and in which direction the joints move

**Which modifications did we implement?**

**Action Scaler** 
- reduces the overall scale of the actions by 80% -> more controlled range
- resulting in the agent learning more stable walking patterns and produce more natrual and controlled movements 

**SmoothActionWrapper** 
- smoothens actions by blending the current action with the previous one, avoiding jerking movements 

**ClippedActionWrappers** 
- ensures the actions remain with a moderate range, preventing extreme torque values
- avoid actions that would destabilize the environment 

**JointScalerWrapper** 
- applies custom scaling factors for each joint to influce specific behaviors, such as limiting certain joints that contribute to jumping 
- finer control over specific joints

# Environment with 1M Timesteps

In [4]:
# Directories to save the model and logs
models_dir = "Models"
logdir = "logs_action"

# Create directories if they don't exist
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

class ActionScaler(gym.ActionWrapper):
    def __init__(self, env, scale=0.5):
        super().__init__(env)
        self.scale = scale

    def action(self, action):
        return self.scale * action  # Scale down actions

class SmoothActionWrapper(gym.ActionWrapper):
    def __init__(self, env, alpha=0.9):
        super().__init__(env)
        self.prev_action = np.zeros(env.action_space.shape)
        self.alpha = alpha

    def action(self, action):
        smoothed_action = self.alpha * self.prev_action + (1 - self.alpha) * action
        self.prev_action = smoothed_action
        return smoothed_action

class ClippedActionWrapper(gym.ActionWrapper):
    def action(self, action):
        return np.clip(action, -0.5, 0.5)  # Limit actions within a moderate range

class JointScalerWrapper(gym.ActionWrapper):
    def __init__(self, env, scale_factors):
        super().__init__(env)
        self.scale_factors = scale_factors  # Scale for each joint

    def action(self, action):
        return action * self.scale_factors


seed=42

# Create the environment
try:
    base_env = gym.make('Walker2d-v5')
    env = ActionScaler(base_env, scale=0.8)  # Moderate scaling for overall actions
    env = SmoothActionWrapper(base_env, alpha=0.8)  # Smoothing actions
    env = ClippedActionWrapper(base_env)  # Clipping extreme actions
    env = JointScalerWrapper(base_env, scale_factors=np.array([1.0, 0.7, 0.7, 1.0, 1.0, 0.7]))  # Joint-specific scaling
    env.reset(seed=seed)
    print("Environment successfully created and reset.")
except Exception as e:
    print(f"Failed to create environment: {e}")
    env = None

# Initialize the SAC  model if not already initialized
model_path = f"{models_dir}/Sac_action.zip"

# Check if the model path exists
if os.path.exists(model_path) and env is not None:
    try:
        model = SAC.load(model_path, env=env)
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Failed to load model: {e}")
        model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir)
else:
    print("Model path does not exist or environment creation failed. Initializing new model.")
    model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir,seed=seed)

## Training parameters
TIMESTEPS = 1000000  
total_timesteps = 1 * TIMESTEPS  # Total training steps after 30 iterations


if env is not None:
    # Train the model
    model.learn(total_timesteps=TIMESTEPS, tb_log_name="log_action")
    #Save the model after the full training
    model.save(f"{models_dir}/action_model")

    env.close()
else:
    print("Skipping training as the environment was not created successfully.")

Environment successfully created and reset.
Model path does not exist or environment creation failed. Initializing new model.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs_action/log_action_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.5     |
|    ep_rew_mean     | 3.83     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 3677     |
|    time_elapsed    | 0        |
|    total_timesteps | 94       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | 4.62     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 217      |
|    time_elapsed    | 0        |
|    total_timesteps | 198      |
| train/             |          |
|    actor_loss      | -8.61    |
|    critic_loss     | 2.84   

In [5]:
#evaluate model created with action wrappers 
import numpy as np
import json
import gymnasium as gym
from datetime import datetime
import os
from stable_baselines3 import SAC
from typing import Optional, Type

def evaluate_walker_model(
    model_path: str,
    num_episodes: int = 30,
    seed: Optional[int] = None
) -> dict:
    """
    Evaluate a pre-trained Walker2d model focusing on distance achieved and velocity maintained.
    
    Args:
        model_path: Path to the saved model
        num_episodes: Number of evaluation episodes
        seed: Random seed for evaluation
    
    Returns:
        Dictionary containing evaluation metrics
    """
    # Create evaluation environment
    eval_env = gym.make('Walker2d-v5')
    if seed is not None:
        eval_env.reset(seed=seed)
    
    # Load the trained model
    model = SAC.load(model_path)
    
    # Initialize storage for episode data
    episode_data = []
    
    for ep in range(num_episodes):
        obs, _ = eval_env.reset()
        done = False
        ep_data = {
            'episode_length': 0,
            'velocities': [],
            'terminated': False,
            'truncated': False
        }
        
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = eval_env.step(action)
            done = terminated or truncated
            
            # Update episode data
            ep_data['episode_length'] += 1
            ep_data['velocities'].append(float(info.get('x_velocity', 0)))
            if done:  # Store final position when episode ends
                ep_data['final_position'] = float(info.get('x_position', 0))
            ep_data['terminated'] = terminated
            ep_data['truncated'] = truncated
        
        # Calculate episode statistics
        avg_velocity = np.mean(ep_data['velocities'])
        ep_stats = {
            'episode': ep + 1,
            'episode_length': ep_data['episode_length'],
            'distance': ep_data['final_position'],
            'avg_velocity': avg_velocity,
            'reached_max_steps': ep_data['truncated']
        }
        
        episode_data.append(ep_stats)
        print(f"Episode {ep + 1}: "
              f"Length = {ep_stats['episode_length']}, "
              f"Position = {ep_stats['distance']:.2f}, "  
              f"Avg Velocity = {ep_stats['avg_velocity']:.2f}, "
              f"{'Completed' if ep_stats['reached_max_steps'] else 'Terminated'}")
    
    # Calculate overall statistics
    completion_rate = sum(1 for ep in episode_data if ep['reached_max_steps']) / num_episodes
    avg_distance = np.mean([ep['distance'] for ep in episode_data])
    std_distance = np.std([ep['distance'] for ep in episode_data])
    avg_velocity = np.mean([ep['avg_velocity'] for ep in episode_data])
    std_velocity = np.std([ep['avg_velocity'] for ep in episode_data])
    
    # Compile results
    results = {
        'completion_rate': float(completion_rate),
        'average_position': float(avg_distance),
        'position_std': float(std_distance),
        'average_velocity': float(avg_velocity),
        'velocity_std': float(std_velocity),
        'num_episodes': num_episodes,
        'evaluation_seed': seed,
        'timestamp': datetime.now().strftime("%Y%m%d_%H%M%S"),
        'episode_details': episode_data
    }
    
    # Print summary
    print("\nEvaluation Results:")
    print(f"Completion Rate: {completion_rate*100:.1f}%")
    print(f"Average Position: {avg_distance:.2f} ± {std_distance:.2f}")
    print(f"Average Velocity: {avg_velocity:.2f} ± {std_velocity:.2f}")
    
    # Save results
    os.makedirs("evaluation_results", exist_ok=True)
    results_path = f"evaluation_results/eval_{results['timestamp']}.json"
    with open(results_path, 'w') as f:
        json.dump(results, f, indent=4)
    
    eval_env.close()
    return results

In [10]:
results = evaluate_walker_model(
    model_path="Models/action_model.zip",
    num_episodes=100,
    seed=42  
)

Episode 1: Length = 1000, Position = 26.07, Avg Velocity = 3.26, Completed
Episode 2: Length = 1000, Position = 25.81, Avg Velocity = 3.23, Completed
Episode 3: Length = 1000, Position = 25.64, Avg Velocity = 3.20, Completed
Episode 4: Length = 1000, Position = 25.97, Avg Velocity = 3.25, Completed
Episode 5: Length = 1000, Position = 26.47, Avg Velocity = 3.31, Completed
Episode 6: Length = 1000, Position = 25.94, Avg Velocity = 3.24, Completed
Episode 7: Length = 1000, Position = 25.01, Avg Velocity = 3.13, Completed
Episode 8: Length = 1000, Position = 26.31, Avg Velocity = 3.29, Completed
Episode 9: Length = 1000, Position = 26.67, Avg Velocity = 3.33, Completed
Episode 10: Length = 1000, Position = 26.29, Avg Velocity = 3.29, Completed
Episode 11: Length = 1000, Position = 25.95, Avg Velocity = 3.24, Completed
Episode 12: Length = 1000, Position = 25.55, Avg Velocity = 3.19, Completed
Episode 13: Length = 1000, Position = 26.43, Avg Velocity = 3.30, Completed
Episode 14: Length = 

In [15]:
import gymnasium as gym
from stable_baselines3 import SAC

# Load the environment with rendering enabled
env = gym.make('Walker2d-v5', render_mode="human") #With visualization
#env = gym.make('Walker2d-v5') #Without vizualization

# Path to the saved SAC model
model_path = "Models/action_model.zip" #  1 000 000 Timesteps



# Load the trained SAC model
model = SAC.load(model_path)

# Visualize for 30 episodes
for episode in range(30):
    obs, info = env.reset()
    done = False

    while not done:
        # Predict the action using the trained SAC model
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        

        # Check if the episode has ended
        done = terminated or truncated

    # Step through the environment
    
    print(f"Episode {episode}: Terminated={terminated}, Truncated={truncated}, Info={info}")

# Close the environment after visualization
env.close()

Episode 0: Terminated=False, Truncated=True, Info={'x_position': 26.441174970542075, 'z_distance_from_origin': 0.1382164139978035, 'x_velocity': 4.1956767005157936, 'reward_forward': 4.1956767005157936, 'reward_ctrl': -0.003815241098403931, 'reward_survive': 1.0}
Episode 1: Terminated=False, Truncated=True, Info={'x_position': 26.7759192652565, 'z_distance_from_origin': -0.0223938805802395, 'x_velocity': 3.459583395450494, 'reward_forward': 3.459583395450494, 'reward_ctrl': -0.003067830562591553, 'reward_survive': 1.0}
Episode 2: Terminated=False, Truncated=True, Info={'x_position': 26.29446161099367, 'z_distance_from_origin': -0.08377487355745483, 'x_velocity': 3.6419670137850524, 'reward_forward': 3.6419670137850524, 'reward_ctrl': -0.0033849701881408694, 'reward_survive': 1.0}
Episode 3: Terminated=False, Truncated=True, Info={'x_position': 25.77429716545743, 'z_distance_from_origin': 0.06442728335947367, 'x_velocity': 3.778637527853679, 'reward_forward': 3.778637527853679, 'reward_

# Timestep 5M 


In [None]:
# Directories to save the model and logs
models_dir = "Models"
logdir = "logs_action"

# Create directories if they don't exist
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

class ActionScaler(gym.ActionWrapper):
    def __init__(self, env, scale=0.5):
        super().__init__(env)
        self.scale = scale

    def action(self, action):
        return self.scale * action  # Scale down actions

class SmoothActionWrapper(gym.ActionWrapper):
    def __init__(self, env, alpha=0.9):
        super().__init__(env)
        self.prev_action = np.zeros(env.action_space.shape)
        self.alpha = alpha

    def action(self, action):
        smoothed_action = self.alpha * self.prev_action + (1 - self.alpha) * action
        self.prev_action = smoothed_action
        return smoothed_action

class ClippedActionWrapper(gym.ActionWrapper):
    def action(self, action):
        return np.clip(action, -0.5, 0.5)  # Limit actions within a moderate range

class JointScalerWrapper(gym.ActionWrapper):
    def __init__(self, env, scale_factors):
        super().__init__(env)
        self.scale_factors = scale_factors  # Scale for each joint

    def action(self, action):
        return action * self.scale_factors


seed=42

# Create the environment
try:
    base_env = gym.make('Walker2d-v5')
    env = ActionScaler(base_env, scale=0.8)  # Moderate scaling for overall actions
    env = SmoothActionWrapper(base_env, alpha=0.8)  # Smoothing actions
    env = ClippedActionWrapper(base_env)  # Clipping extreme actions
    env = JointScalerWrapper(base_env, scale_factors=np.array([1.0, 0.7, 0.7, 1.0, 1.0, 0.7]))  # Joint-specific scaling
    env.reset(seed=seed)
    print("Environment successfully created and reset.")
except Exception as e:
    print(f"Failed to create environment: {e}")
    env = None

# Initialize the SAC  model if not already initialized
model_path = f"{models_dir}/Sac_action.zip"

# Check if the model path exists
if os.path.exists(model_path) and env is not None:
    try:
        model = SAC.load(model_path, env=env)
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Failed to load model: {e}")
        model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir)
else:
    print("Model path does not exist or environment creation failed. Initializing new model.")
    model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir,seed=seed)

## Training parameters
TIMESTEPS = 5000000  
total_timesteps = 1 * TIMESTEPS  # Total training steps after 30 iterations


if env is not None:
    # Train the model
    model.learn(total_timesteps=TIMESTEPS, tb_log_name="log_action")
    #Save the model after the full training
    model.save(f"{models_dir}/action_model")

    env.close()
else:
    print("Skipping training as the environment was not created successfully.")

Environment successfully created and reset.
Model path does not exist or environment creation failed. Initializing new model.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs_action/log_action_lr_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.5     |
|    ep_rew_mean     | 3.83     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 4228     |
|    time_elapsed    | 0        |
|    total_timesteps | 94       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | 4.63     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 269      |
|    time_elapsed    | 0        |
|    total_timesteps | 198      |
| train/             |          |
|    actor_loss      | -8.61    |
|    critic_loss     | 2.84

KeyboardInterrupt: 

In [17]:
results = evaluate_walker_model(
    model_path="Models/action_model_5M.zip",
    num_episodes=100,
    seed=42  
)

Episode 1: Length = 498, Position = 14.50, Avg Velocity = 3.64, Terminated
Episode 2: Length = 383, Position = 11.10, Avg Velocity = 3.62, Terminated
Episode 3: Length = 1000, Position = 30.76, Avg Velocity = 3.84, Completed
Episode 4: Length = 992, Position = 29.46, Avg Velocity = 3.71, Terminated
Episode 5: Length = 672, Position = 20.18, Avg Velocity = 3.76, Terminated
Episode 6: Length = 597, Position = 18.10, Avg Velocity = 3.79, Terminated
Episode 7: Length = 560, Position = 16.04, Avg Velocity = 3.58, Terminated
Episode 8: Length = 640, Position = 19.89, Avg Velocity = 3.88, Terminated
Episode 9: Length = 769, Position = 23.26, Avg Velocity = 3.78, Terminated
Episode 10: Length = 396, Position = 12.22, Avg Velocity = 3.86, Terminated
Episode 11: Length = 509, Position = 14.78, Avg Velocity = 3.63, Terminated
Episode 12: Length = 1000, Position = 31.25, Avg Velocity = 3.91, Completed
Episode 13: Length = 508, Position = 15.04, Avg Velocity = 3.70, Terminated
Episode 14: Length = 

In [19]:
import gymnasium as gym
from stable_baselines3 import SAC

# Load the environment with rendering enabled
env = gym.make('Walker2d-v5', render_mode="human") #With visualization
#env = gym.make('Walker2d-v5') #Without vizualization

# Path to the saved SAC model
model_path = "Models/action_model_5M.zip" #  1 000 000 Timesteps



# Load the trained SAC model
model = SAC.load(model_path)

# Visualize for 30 episodes
for episode in range(30):
    obs, info = env.reset()
    done = False

    while not done:
        # Predict the action using the trained SAC model
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        

        # Check if the episode has ended
        done = terminated or truncated

    # Step through the environment
    
    print(f"Episode {episode}: Terminated={terminated}, Truncated={truncated}, Info={info}")

# Close the environment after visualization
env.close()

Episode 0: Terminated=True, Truncated=False, Info={'x_position': 14.93922218576242, 'z_distance_from_origin': -0.45197897320272795, 'x_velocity': 0.09623708391548469, 'reward_forward': 0.09623708391548469, 'reward_ctrl': -0.004939473152160645, 'reward_survive': 0.0}
Episode 1: Terminated=True, Truncated=False, Info={'x_position': 8.901989825340813, 'z_distance_from_origin': -0.4594776486849125, 'x_velocity': 1.6510976723393966, 'reward_forward': 1.6510976723393966, 'reward_ctrl': -0.005294286727905274, 'reward_survive': 0.0}
Episode 2: Terminated=True, Truncated=False, Info={'x_position': 29.79922388337344, 'z_distance_from_origin': -0.459562865265813, 'x_velocity': 3.185406676682767, 'reward_forward': 3.185406676682767, 'reward_ctrl': -0.004816721916198731, 'reward_survive': 0.0}
Episode 3: Terminated=True, Truncated=False, Info={'x_position': 11.455364692189283, 'z_distance_from_origin': -0.4588791871292496, 'x_velocity': 5.825166887738487, 'reward_forward': 5.825166887738487, 'rewar

# Timestep 5M using Learning Rate

In [23]:
def linear_schedule(initial_value):
    """
    Linear learning rate schedule.
    :param initial_value: Initial learning rate (e.g., 3e-4).
    :return: A callable function that computes the current learning rate based on progress_remaining.
    """
    def schedule(progress_remaining):
        """
        Calculates the scaled learning rate.
        :param progress_remaining: Float between 1.0 (start of training) and 0.0 (end of training).
        :return: Current learning rate.
        """
        return progress_remaining * initial_value
    return schedule


In [24]:
# Directories to save the model and logs
models_dir = "Models"
logdir = "logs_action"

# Create directories if they don't exist
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if not os.path.exists(logdir):
    os.makedirs(logdir)

class ActionScaler(gym.ActionWrapper):
    def __init__(self, env, scale=0.5):
        super().__init__(env)
        self.scale = scale

    def action(self, action):
        return self.scale * action  # Scale down actions

class SmoothActionWrapper(gym.ActionWrapper):
    def __init__(self, env, alpha=0.9):
        super().__init__(env)
        self.prev_action = np.zeros(env.action_space.shape)
        self.alpha = alpha

    def action(self, action):
        smoothed_action = self.alpha * self.prev_action + (1 - self.alpha) * action
        self.prev_action = smoothed_action
        return smoothed_action

class ClippedActionWrapper(gym.ActionWrapper):
    def action(self, action):
        return np.clip(action, -0.5, 0.5)  # Limit actions within a moderate range

class JointScalerWrapper(gym.ActionWrapper):
    def __init__(self, env, scale_factors):
        super().__init__(env)
        self.scale_factors = scale_factors  # Scale for each joint

    def action(self, action):
        return action * self.scale_factors


seed=42

# Create the environment
try:
    base_env = gym.make('Walker2d-v5')
    env = ActionScaler(base_env, scale=0.8)  # Moderate scaling for overall actions
    env = SmoothActionWrapper(base_env, alpha=0.8)  # Smoothing actions
    env = ClippedActionWrapper(base_env)  # Clipping extreme actions
    env = JointScalerWrapper(base_env, scale_factors=np.array([1.0, 0.7, 0.7, 1.0, 1.0, 0.7]))  # Joint-specific scaling
    env.reset(seed=seed)
    print("Environment successfully created and reset.")
except Exception as e:
    print(f"Failed to create environment: {e}")
    env = None

# Initialize the SAC  model if not already initialized
model_path = f"{models_dir}/Sac_action.zip"

# Check if the model path exists
initial_lr = 3e-4 
if os.path.exists(model_path) and env is not None:
    try:
        model = SAC.load(model_path, env=env)
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Failed to load model: {e}")
        model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir, learning_rate=linear_schedule(initial_lr))
else:
    print("Model path does not exist or environment creation failed. Initializing new model.")
    model = SAC('MlpPolicy', env, verbose=1, tensorboard_log=logdir,seed=seed,learning_rate=linear_schedule(initial_lr))

## Training parameters
TIMESTEPS = 5000000  
total_timesteps = 1 * TIMESTEPS  # Total training steps after 30 iterations


if env is not None:
    # Train the model
    model.learn(total_timesteps=TIMESTEPS, tb_log_name="log_action_lr")
    #Save the model after the full training
    model.save(f"{models_dir}/action_model_5M_lr")

    env.close()
else:
    print("Skipping training as the environment was not created successfully.")

Environment successfully created and reset.
Model path does not exist or environment creation failed. Initializing new model.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to logs_action/log_action_lr_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 23.5     |
|    ep_rew_mean     | 3.83     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 4722     |
|    time_elapsed    | 0        |
|    total_timesteps | 94       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 24.8     |
|    ep_rew_mean     | 4.63     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 219      |
|    time_elapsed    | 0        |
|    total_timesteps | 198      |
| train/             |          |
|    actor_loss      | -8.61    |
|    critic_loss     | 2.84

In [None]:
results = evaluate_walker_model(
    model_path="Models/action_model_5M_lr.zip",
    num_episodes=100,
    seed=42  
)

Episode 1: Length = 1000, Position = 26.61, Avg Velocity = 3.33, Completed
Episode 2: Length = 1000, Position = 25.25, Avg Velocity = 3.16, Completed
Episode 3: Length = 1000, Position = 25.12, Avg Velocity = 3.14, Completed
Episode 4: Length = 1000, Position = 25.83, Avg Velocity = 3.23, Completed
Episode 5: Length = 1000, Position = 24.76, Avg Velocity = 3.10, Completed
Episode 6: Length = 1000, Position = 25.53, Avg Velocity = 3.19, Completed
Episode 7: Length = 1000, Position = 25.01, Avg Velocity = 3.13, Completed
Episode 8: Length = 1000, Position = 27.30, Avg Velocity = 3.41, Completed
Episode 9: Length = 1000, Position = 27.50, Avg Velocity = 3.44, Completed
Episode 10: Length = 1000, Position = 26.74, Avg Velocity = 3.34, Completed
Episode 11: Length = 1000, Position = 25.76, Avg Velocity = 3.22, Completed
Episode 12: Length = 1000, Position = 25.94, Avg Velocity = 3.24, Completed
Episode 13: Length = 1000, Position = 25.61, Avg Velocity = 3.20, Completed
Episode 14: Length = 

In [26]:
import gymnasium as gym
from stable_baselines3 import SAC

# Load the environment with rendering enabled
env = gym.make('Walker2d-v5', render_mode="human") #With visualization
#env = gym.make('Walker2d-v5') #Without vizualization

# Path to the saved SAC model
model_path = "Models/action_model_5M_lr.zip" #  1 000 000 Timesteps



# Load the trained SAC model
model = SAC.load(model_path)

# Visualize for 30 episodes
for episode in range(30):
    obs, info = env.reset()
    done = False

    while not done:
        # Predict the action using the trained SAC model
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        

        # Check if the episode has ended
        done = terminated or truncated

    # Step through the environment
    
    print(f"Episode {episode}: Terminated={terminated}, Truncated={truncated}, Info={info}")

# Close the environment after visualization
env.close()

Episode 0: Terminated=False, Truncated=True, Info={'x_position': 29.36636194782821, 'z_distance_from_origin': -0.06590854786255695, 'x_velocity': 3.65632408972294, 'reward_forward': 3.65632408972294, 'reward_ctrl': -0.004136053562164307, 'reward_survive': 1.0}
Episode 1: Terminated=False, Truncated=True, Info={'x_position': 27.837827717554767, 'z_distance_from_origin': -0.16392169877378948, 'x_velocity': 2.7663737559215917, 'reward_forward': 2.7663737559215917, 'reward_ctrl': -0.003016355037689209, 'reward_survive': 1.0}
Episode 2: Terminated=False, Truncated=True, Info={'x_position': 27.440241657163543, 'z_distance_from_origin': -0.08393274716756927, 'x_velocity': 3.329074783372299, 'reward_forward': 3.329074783372299, 'reward_ctrl': -0.004014641761779785, 'reward_survive': 1.0}
Episode 3: Terminated=False, Truncated=True, Info={'x_position': 26.007421298855796, 'z_distance_from_origin': -0.0200705238005503, 'x_velocity': 3.0476161356660647, 'reward_forward': 3.0476161356660647, 'rewa