In [None]:
# Importing necessary modules

from torch.distributions import MultivariateNormal
import matplotlib.pyplot as plt
from collections import deque
import torch.optim as optim
import gymnasium as gym
import torch.nn as nn
from tqdm import tqdm
import numpy as np
import imageio
import torch

In [None]:
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, action_std):
        """
        Initializes the Actor network.
        
        Args:
        - state_dim  : Dimension of the input state.
        - action_dim : Dimension of the output action.
        - action_std : Standard deviation for the action distribution (controls exploration).
        """
        super(Actor, self).__init__()

        # Define a simple feedforward neural network
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim), 
        )
        self.action_std = action_std

    def forward(self, state):
        """
        Forward pass of the Actor network.
        
        Args:
        - state: Input state tensor (batch_size x state_dim).
        
        Returns:
        - action_mean : Mean of the actions (batch_size x action_dim).
        - cov_matrix  : Covariance matrix   (batch_size x action_dim x action_dim).
        """
        action_mean = self.fc(state)  # Compute the action means from the network

        # Create a diagonal covariance matrix using the standard deviation squared
        cov_matrix = torch.diag(torch.full((action_mean.size(-1),), self.action_std**2)).unsqueeze(0)
        return action_mean, cov_matrix  # Return the mean and covariance for the action distribution


class Critic(nn.Module):
    def __init__(self, state_dim):
        """
        Initializes the Critic network.
        
        Args:
        - state_dim: Dimension of the input state.
        """
        super(Critic, self).__init__()

        # Define a feedforward neural network for state value estimation
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )

    def forward(self, state):
        """
        Forward pass of the Critic network.
        
        Args:
        - state: Input state tensor (batch_size x state_dim).
        
        Returns:
        - Value estimate: Scalar value representing the state value (batch_size x 1).
        """
        return self.fc(state)  # Compute and return the state value

In [None]:
class PPO:
    def __init__(self, state_dim, action_dim, action_std, lr, gamma, eps_clip):
        """
        Initializes the PPO agent.
        
        Args:
        - state_dim  : Dimension of the input state.
        - action_dim : Dimension of the output action.
        - action_std : Standard deviation of the action distribution.
        - lr         : Learning rate for the optimizer.
        - gamma      : Discount factor for rewards.
        - eps_clip   : Clipping parameter for PPO (used in the policy update).
        """
        # Instantiate the Actor and Critic networks
        self.actor  = Actor(state_dim, action_dim, action_std)  # Policy network
        self.critic = Critic(state_dim)                         # Value network
        
        # Define optimizers for both networks - Using Adam for both
        self.actor_optimizer  = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr)
        
        # Store other PPO-specific parameters
        self.gamma      = gamma       # Discount factor for calculating the return
        self.eps_clip   = eps_clip    # Clipping parameter for PPO to prevent large updates
        self.action_std = action_std  # Standard deviation for action exploration

    def select_action(self, state):
        """
        Selects an action based on the current state using the policy network.
        
        Args:
        - state: The current state (numpy array or tensor).
        
        Returns:
        - action: The action sampled from the policy (numpy array).
        - action_logprob: The log probability of the action (used in training).
        """
        with torch.no_grad():
            # Convert the state to a PyTorch tensor and add a batch dimension
            state = torch.FloatTensor(state).unsqueeze(0)
            
            # Pass the state through the Actor to get the action mean and covariance matrix
            action_mean, cov_matrix = self.actor(state)
            
            # Create a multivariate normal distribution for the action and sample the action from it
            dist           = MultivariateNormal(action_mean, cov_matrix)
            action         = dist.sample()
            action_logprob = dist.log_prob(action)
            
            # Return the action as a numpy array (remove the batch dimension) and its log probability
            return action.squeeze(0).numpy(), action_logprob

    def evaluate(self, states, actions):
        """
        Evaluates the actions and states to compute log probabilities, state values, and entropy.
        
        Args:
        - states  : A batch of input states  (tensor of shape batch_size x state_dim).
        - actions : A batch of actions taken (tensor of shape batch_size x action_dim).
        
        Returns:
        - action_logprobs : Log probabilities of the actions   (tensor of shape batch_size).
        - state_values    : Estimated state values             (tensor of shape batch_size x 1).
        - dist_entropy    : Entropy of the action distribution (tensor of shape batch_size).
        """
        # Pass the states through the Actor to get action means and covariance matrices
        action_mean, cov_matrix = self.actor(states)
        
        # Create a multivariate normal distribution using the action mean and covariance and 
        # Compute the log probabilities of the given actions
        dist            = MultivariateNormal(action_mean, cov_matrix)
        action_logprobs = dist.log_prob(actions)
        
        # Compute the entropy of the distribution (used to encourage exploration)
        dist_entropy = dist.entropy()
        
        # Pass the states through the Critic to get the state values
        state_values = self.critic(states)
        
        # Return the log probabilities, state values, and entropy
        return action_logprobs, state_values, dist_entropy


In [None]:
def collect_rollout(env, agent, timesteps):
    """
    Collects a batch of environment interactions (rollout) using the current policy.
    
    Args:
    - env       : The gymnasium environment to interact with
    - agent     : The PPO agent containing the policy
    - timesteps : Number of timesteps to collect per rollout
    
    Returns:
    - states   : List of observed states
    - actions  : List of actions taken
    - rewards  : List of rewards received
    - logprobs : List of log probabilities of chosen actions
    """
    state = env.reset()[0]  # Reset environment and get initial state
    states, actions, rewards, logprobs = [], [], [], []
    
    for _ in range(timesteps):
        # Select action using current policy
        action, logprob = agent.select_action(state)
        
        # Take action in environment
        next_state, reward, done, _, _ = env.step(action)
        
        # Store transition data
        states.append(state)
        actions.append(action)
        rewards.append(reward)
        logprobs.append(logprob)
        
        # Reset environment if episode is done, otherwise continue
        if done:
            state = env.reset()[0]
        else:
            state = next_state
            
    return states, actions, rewards, logprobs

In [None]:
def compute_returns(rewards, gamma):
    """
    Computes discounted returns (cumulative future rewards) for each timestep.
    
    Args:
    - rewards : List of immediate rewards from the environment
    - gamma   : Discount factor for future rewards
    
    Returns:
    - returns : List of discounted returns for each timestep
    """
    returns = []
    discounted_sum = 0
    # Iterate through rewards in reverse order to compute returns
    for r in reversed(rewards):
        discounted_sum = r + gamma * discounted_sum
        returns.insert(0, discounted_sum)
    return returns

In [None]:
def update(agent, states, actions, rewards, logprobs, gamma, eps_clip):
    """
    Updates the policy and value networks using the PPO algorithm.
    
    Args:
    - agent      : The PPO agent containing the networks
    - states     : Batch of observed states
    - actions    : Batch of actions taken
    - rewards    : List of discounted returns
    - logprobs   : Log probabilities of the actions taken
    - gamma      : Discount factor for rewards
    - eps_clip   : PPO clipping parameter
    """
    # Convert collected data to PyTorch tensors
    states       = torch.FloatTensor(states)
    actions      = torch.FloatTensor(actions)
    rewards      = torch.FloatTensor(compute_returns(rewards, gamma))
    old_logprobs = torch.FloatTensor(logprobs)

    # Perform multiple epochs of PPO updates
    for _ in range(10):  
        # Evaluate current policy and value estimates
        logprobs, state_values, dist_entropy = agent.evaluate(states, actions)
        
        # Calculate advantages (how much better/worse actions were than expected)
        advantages  = rewards - state_values.squeeze().detach()
        
        # Normalize advantages for stable training
        advantages  = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

        # Calculate PPO policy loss with clipping
        ratios      = torch.exp(logprobs - old_logprobs)  # Importance sampling ratios
        surr1       = ratios * advantages
        surr2       = torch.clamp(ratios, 1 - eps_clip, 1 + eps_clip) * advantages
        policy_loss = -torch.min(surr1, surr2).mean()

        # Calculate value loss and combine losses
        value_loss  = nn.MSELoss()(state_values.squeeze(), rewards)
        loss        = policy_loss + 0.5 * value_loss - 0.01 * dist_entropy.mean()

        # Perform gradient update
        agent.actor_optimizer.zero_grad()
        agent.critic_optimizer.zero_grad()
        loss.backward()
        agent.actor_optimizer.step()
        agent.critic_optimizer.step()

In [None]:
def run_single_trial(trial_num, env_name):
    """
    Runs a single training trial of the PPO algorithm.
    
    Args:
    - trial_num : Current trial number (for tracking purposes)
    - env_name  : Name of the gymnasium environment to use
    
    Returns:
    - trial_rewards : List of rewards obtained during training
    - ppo           : Trained PPO agent
    """
    print(f"\nStarting Trial {trial_num + 1}/5")
    
    # Initialize environment and get dimensions
    env        = gym.make(env_name, render_mode="rgb_array")
    state_dim  = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    action_std = 0.5  # Standard deviation for exploration
    
    # Set random seeds for reproducibility
    torch.manual_seed(trial_num)
    np.random.seed(trial_num)
    
    # Initialize PPO agent and training parameters
    ppo                   = PPO(state_dim, action_dim, action_std, lr=3e-4, gamma=0.99, eps_clip=0.2)
    episodes              = 1000
    timesteps_per_rollout = 2048
    
    # Initialize tracking variables
    trial_rewards = []
    avg_rewards = deque(maxlen=100)
    
    # Main training loop
    for episode in tqdm(range(episodes), desc=f"Trial {trial_num + 1}"):
        # Collect experience and update policy
        states, actions, rewards, logprobs = collect_rollout(env, ppo, timesteps_per_rollout)
        update(ppo, states, actions, rewards, logprobs, gamma=0.99, eps_clip=0.2)
        
        # Track rewards
        episode_reward = sum(rewards)
        trial_rewards.append(episode_reward)
        avg_rewards.append(episode_reward)
        
        # Print progress every 10 episodes
        if episode % 10 == 0:
            avg_reward = np.mean(list(avg_rewards))
            print(f"\nTrial {trial_num + 1}, Episode {episode}, Average Reward: {avg_reward:.2f}")
    
    # Record demonstration video of trained agent
    try:
        frames    = []
        state     = env.reset()[0]
        done      = False
        steps     = 0
        max_steps = 500
        
        while not done and steps < max_steps:
            action, _ = ppo.select_action(state)
            state, _, done, _, _ = env.step(action)
            if steps % 2 == 0:  # Save every other frame to reduce video size
                frames.append(env.render())
            steps += 1
        
        video_filename = f"ppo_halfcheetah_trial_{trial_num + 1}.mp4"
        imageio.mimsave(video_filename, frames, fps=30)
        print(f"Video saved as '{video_filename}'")
    except Exception as e:
        print(f"Warning: Could not save video for trial {trial_num + 1}: {e}")
        
    env.close()
    return trial_rewards, ppo

In [None]:
def plot_rewards_with_trials(all_trial_rewards, window=100):
    """
    Plots the training progress across all trials with confidence intervals.
    
    Args:
    - all_trial_rewards : List of rewards from each trial
    - window           : Window size for reward smoothing
    """
    plt.figure(figsize=(10, 6))
    
    # Convert to numpy array for statistical calculations
    rewards_array = np.array(all_trial_rewards)
    
    # Calculate statistics across trials
    mean_rewards = np.mean(rewards_array, axis=0)
    std_rewards = np.std(rewards_array, axis=0)
    
    # Calculate smoothed mean rewards using moving average
    smoothed_means = []
    for i in range(len(mean_rewards)):
        if i < window:
            smoothed_means.append(np.mean(mean_rewards[:i+1]))
        else:
            smoothed_means.append(np.mean(mean_rewards[i-window+1:i+1]))
    smoothed_means = np.array(smoothed_means)
    
    # Plot individual trials with low opacity
    for trial_rewards in all_trial_rewards:
        plt.plot(trial_rewards, alpha=0.1, color='gray')
    
    # Plot mean and confidence interval
    episodes = range(len(mean_rewards))
    plt.plot(episodes, smoothed_means, linewidth=2, color='blue', label='Mean Reward')
    plt.fill_between(episodes, 
                    smoothed_means - std_rewards, 
                    smoothed_means + std_rewards, 
                    color='blue', alpha=0.2, label='Standard Deviation')
    
    # Add plot details
    plt.xlabel('Episode')
    plt.ylabel('Average Return')
    plt.title('PPO Training Progress - HalfCheetah-v4')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig('ppo_training_progress.png', dpi=300, bbox_inches='tight')
    plt.close()

In [None]:
def main():
    """
    Main function to run multiple trials of PPO training and visualize results.
    """
    num_trials = 5
    all_trial_rewards = []
    env = "HalfCheetah-v4"
    
    # Run multiple trials
    for trial in range(num_trials):
        trial_rewards, _ = run_single_trial(trial, env)
        all_trial_rewards.append(trial_rewards)
        np.save(f'trial_rewards_{trial}.npy', trial_rewards)
    
    # Generate and save final visualization
    plot_rewards_with_trials(all_trial_rewards)
    print("\nTraining progress plot saved as 'ppo_training_progress.png'")

if __name__ == "__main__":
    main()


Starting Trial 1/5


Trial 1:   0%|          | 1/1000 [00:01<19:46,  1.19s/it]


Trial 1, Episode 0, Average Reward: -268.52


Trial 1:   1%|          | 11/1000 [00:10<15:07,  1.09it/s]


Trial 1, Episode 10, Average Reward: -356.38


Trial 1:   2%|▏         | 21/1000 [00:19<14:35,  1.12it/s]


Trial 1, Episode 20, Average Reward: -314.88


Trial 1:   3%|▎         | 31/1000 [00:30<18:15,  1.13s/it]


Trial 1, Episode 30, Average Reward: -310.36


Trial 1:   4%|▍         | 41/1000 [00:42<17:37,  1.10s/it]


Trial 1, Episode 40, Average Reward: -290.38


Trial 1:   5%|▌         | 51/1000 [00:52<16:13,  1.03s/it]


Trial 1, Episode 50, Average Reward: -269.78


Trial 1:   6%|▌         | 61/1000 [01:03<17:54,  1.14s/it]


Trial 1, Episode 60, Average Reward: -249.71


Trial 1:   7%|▋         | 71/1000 [01:14<15:45,  1.02s/it]


Trial 1, Episode 70, Average Reward: -241.03


Trial 1:   8%|▊         | 81/1000 [01:24<15:12,  1.01it/s]


Trial 1, Episode 80, Average Reward: -216.82


Trial 1:   9%|▉         | 91/1000 [01:34<14:53,  1.02it/s]


Trial 1, Episode 90, Average Reward: -179.15


Trial 1:  10%|█         | 101/1000 [01:44<14:43,  1.02it/s]


Trial 1, Episode 100, Average Reward: -102.29


Trial 1:  11%|█         | 111/1000 [01:53<14:01,  1.06it/s]


Trial 1, Episode 110, Average Reward: 10.25


Trial 1:  12%|█▏        | 121/1000 [02:03<14:21,  1.02it/s]


Trial 1, Episode 120, Average Reward: 116.31


Trial 1:  13%|█▎        | 131/1000 [02:13<14:28,  1.00it/s]


Trial 1, Episode 130, Average Reward: 248.00


Trial 1:  14%|█▍        | 141/1000 [02:22<13:45,  1.04it/s]


Trial 1, Episode 140, Average Reward: 392.22


Trial 1:  15%|█▌        | 151/1000 [02:32<13:38,  1.04it/s]


Trial 1, Episode 150, Average Reward: 508.43


Trial 1:  16%|█▌        | 161/1000 [02:42<13:03,  1.07it/s]


Trial 1, Episode 160, Average Reward: 632.75


Trial 1:  17%|█▋        | 171/1000 [02:51<13:23,  1.03it/s]


Trial 1, Episode 170, Average Reward: 768.77


Trial 1:  18%|█▊        | 181/1000 [03:01<13:01,  1.05it/s]


Trial 1, Episode 180, Average Reward: 882.65


Trial 1:  19%|█▉        | 191/1000 [03:10<12:34,  1.07it/s]


Trial 1, Episode 190, Average Reward: 1013.96


Trial 1:  20%|██        | 201/1000 [03:20<12:23,  1.07it/s]


Trial 1, Episode 200, Average Reward: 1093.04


Trial 1:  21%|██        | 211/1000 [03:29<12:22,  1.06it/s]


Trial 1, Episode 210, Average Reward: 1132.15


Trial 1:  22%|██▏       | 221/1000 [03:39<14:13,  1.10s/it]


Trial 1, Episode 220, Average Reward: 1214.08


Trial 1:  23%|██▎       | 231/1000 [03:49<12:27,  1.03it/s]


Trial 1, Episode 230, Average Reward: 1259.79


Trial 1:  24%|██▍       | 241/1000 [03:59<11:47,  1.07it/s]


Trial 1, Episode 240, Average Reward: 1287.12


Trial 1:  25%|██▌       | 251/1000 [04:08<11:39,  1.07it/s]


Trial 1, Episode 250, Average Reward: 1376.21


Trial 1:  26%|██▌       | 261/1000 [04:17<11:48,  1.04it/s]


Trial 1, Episode 260, Average Reward: 1449.53


Trial 1:  27%|██▋       | 271/1000 [04:27<11:28,  1.06it/s]


Trial 1, Episode 270, Average Reward: 1479.07


Trial 1:  28%|██▊       | 281/1000 [04:37<11:18,  1.06it/s]


Trial 1, Episode 280, Average Reward: 1479.11


Trial 1:  29%|██▉       | 291/1000 [04:46<11:44,  1.01it/s]


Trial 1, Episode 290, Average Reward: 1404.61


Trial 1:  30%|███       | 301/1000 [04:56<11:19,  1.03it/s]


Trial 1, Episode 300, Average Reward: 1394.63


Trial 1:  31%|███       | 311/1000 [05:06<11:07,  1.03it/s]


Trial 1, Episode 310, Average Reward: 1469.21


Trial 1:  32%|███▏      | 321/1000 [05:15<10:39,  1.06it/s]


Trial 1, Episode 320, Average Reward: 1469.48


Trial 1:  33%|███▎      | 331/1000 [05:25<10:20,  1.08it/s]


Trial 1, Episode 330, Average Reward: 1528.36


Trial 1:  34%|███▍      | 341/1000 [05:35<10:35,  1.04it/s]


Trial 1, Episode 340, Average Reward: 1562.98


Trial 1:  35%|███▌      | 351/1000 [05:44<10:18,  1.05it/s]


Trial 1, Episode 350, Average Reward: 1571.02


Trial 1:  36%|███▌      | 361/1000 [05:54<10:10,  1.05it/s]


Trial 1, Episode 360, Average Reward: 1586.31


Trial 1:  37%|███▋      | 371/1000 [06:04<09:52,  1.06it/s]


Trial 1, Episode 370, Average Reward: 1681.57


Trial 1:  38%|███▊      | 381/1000 [06:13<09:54,  1.04it/s]


Trial 1, Episode 380, Average Reward: 1813.91


Trial 1:  39%|███▉      | 391/1000 [06:23<09:47,  1.04it/s]


Trial 1, Episode 390, Average Reward: 1935.52


Trial 1:  40%|████      | 401/1000 [06:33<09:40,  1.03it/s]


Trial 1, Episode 400, Average Reward: 2021.60


Trial 1:  41%|████      | 411/1000 [06:42<09:35,  1.02it/s]


Trial 1, Episode 410, Average Reward: 2044.50


Trial 1:  42%|████▏     | 421/1000 [06:52<09:08,  1.06it/s]


Trial 1, Episode 420, Average Reward: 2072.12


Trial 1:  43%|████▎     | 431/1000 [07:01<08:47,  1.08it/s]


Trial 1, Episode 430, Average Reward: 2058.32


Trial 1:  44%|████▍     | 441/1000 [07:11<09:22,  1.01s/it]


Trial 1, Episode 440, Average Reward: 2078.80


Trial 1:  45%|████▌     | 451/1000 [07:21<08:35,  1.06it/s]


Trial 1, Episode 450, Average Reward: 2117.07


Trial 1:  46%|████▌     | 461/1000 [07:30<08:32,  1.05it/s]


Trial 1, Episode 460, Average Reward: 2100.61


Trial 1:  47%|████▋     | 471/1000 [07:40<08:26,  1.04it/s]


Trial 1, Episode 470, Average Reward: 1998.57


Trial 1:  48%|████▊     | 481/1000 [07:49<08:28,  1.02it/s]


Trial 1, Episode 480, Average Reward: 1951.76


Trial 1:  49%|████▉     | 491/1000 [07:59<08:15,  1.03it/s]


Trial 1, Episode 490, Average Reward: 1961.41


Trial 1:  50%|█████     | 501/1000 [08:09<07:40,  1.08it/s]


Trial 1, Episode 500, Average Reward: 1982.04


Trial 1:  51%|█████     | 511/1000 [08:18<07:38,  1.07it/s]


Trial 1, Episode 510, Average Reward: 1995.15


Trial 1:  52%|█████▏    | 521/1000 [08:28<07:38,  1.05it/s]


Trial 1, Episode 520, Average Reward: 2042.49


Trial 1:  53%|█████▎    | 531/1000 [08:37<07:19,  1.07it/s]


Trial 1, Episode 530, Average Reward: 2087.65


Trial 1:  54%|█████▍    | 541/1000 [08:46<07:10,  1.07it/s]


Trial 1, Episode 540, Average Reward: 2069.25


Trial 1:  55%|█████▌    | 551/1000 [08:56<07:26,  1.00it/s]


Trial 1, Episode 550, Average Reward: 2057.79


Trial 1:  56%|█████▌    | 561/1000 [09:06<07:07,  1.03it/s]


Trial 1, Episode 560, Average Reward: 2122.54


Trial 1:  57%|█████▋    | 571/1000 [09:16<06:46,  1.06it/s]


Trial 1, Episode 570, Average Reward: 2228.89


Trial 1:  58%|█████▊    | 581/1000 [09:25<06:53,  1.01it/s]


Trial 1, Episode 580, Average Reward: 2277.57


Trial 1:  59%|█████▉    | 591/1000 [09:35<06:34,  1.04it/s]


Trial 1, Episode 590, Average Reward: 2263.86


Trial 1:  60%|██████    | 601/1000 [09:44<06:17,  1.06it/s]


Trial 1, Episode 600, Average Reward: 2276.28


Trial 1:  61%|██████    | 611/1000 [09:54<06:16,  1.03it/s]


Trial 1, Episode 610, Average Reward: 2233.31


Trial 1:  62%|██████▏   | 621/1000 [10:03<05:59,  1.05it/s]


Trial 1, Episode 620, Average Reward: 2232.41


Trial 1:  63%|██████▎   | 631/1000 [10:13<05:56,  1.03it/s]


Trial 1, Episode 630, Average Reward: 2237.53


Trial 1:  64%|██████▍   | 641/1000 [10:22<05:32,  1.08it/s]


Trial 1, Episode 640, Average Reward: 2289.30


Trial 1:  65%|██████▌   | 651/1000 [10:32<05:42,  1.02it/s]


Trial 1, Episode 650, Average Reward: 2272.57


Trial 1:  66%|██████▌   | 661/1000 [10:42<05:28,  1.03it/s]


Trial 1, Episode 660, Average Reward: 2264.55


Trial 1:  67%|██████▋   | 671/1000 [10:51<05:21,  1.02it/s]


Trial 1, Episode 670, Average Reward: 2229.61


Trial 1:  68%|██████▊   | 681/1000 [11:01<05:12,  1.02it/s]


Trial 1, Episode 680, Average Reward: 2245.43


Trial 1:  69%|██████▉   | 691/1000 [11:11<04:57,  1.04it/s]


Trial 1, Episode 690, Average Reward: 2324.11


Trial 1:  70%|███████   | 701/1000 [11:21<04:58,  1.00it/s]


Trial 1, Episode 700, Average Reward: 2281.40


Trial 1:  71%|███████   | 711/1000 [11:31<04:44,  1.02it/s]


Trial 1, Episode 710, Average Reward: 2344.27


Trial 1:  72%|███████▏  | 721/1000 [11:41<04:35,  1.01it/s]


Trial 1, Episode 720, Average Reward: 2299.37


Trial 1:  73%|███████▎  | 731/1000 [11:50<04:21,  1.03it/s]


Trial 1, Episode 730, Average Reward: 2249.13


Trial 1:  74%|███████▍  | 741/1000 [12:00<04:11,  1.03it/s]


Trial 1, Episode 740, Average Reward: 2257.61


Trial 1:  75%|███████▌  | 751/1000 [12:10<03:53,  1.07it/s]


Trial 1, Episode 750, Average Reward: 2309.29


Trial 1:  76%|███████▌  | 761/1000 [12:19<03:42,  1.08it/s]


Trial 1, Episode 760, Average Reward: 2331.16


Trial 1:  77%|███████▋  | 771/1000 [12:28<03:38,  1.05it/s]


Trial 1, Episode 770, Average Reward: 2362.02


Trial 1:  78%|███████▊  | 781/1000 [12:38<03:33,  1.03it/s]


Trial 1, Episode 780, Average Reward: 2361.79


Trial 1:  79%|███████▉  | 791/1000 [12:48<03:21,  1.04it/s]


Trial 1, Episode 790, Average Reward: 2364.61


Trial 1:  80%|████████  | 801/1000 [12:58<03:13,  1.03it/s]


Trial 1, Episode 800, Average Reward: 2439.79


Trial 1:  81%|████████  | 811/1000 [13:08<03:05,  1.02it/s]


Trial 1, Episode 810, Average Reward: 2480.75


Trial 1:  82%|████████▏ | 821/1000 [13:17<02:57,  1.01it/s]


Trial 1, Episode 820, Average Reward: 2593.11


Trial 1:  83%|████████▎ | 831/1000 [13:27<02:52,  1.02s/it]


Trial 1, Episode 830, Average Reward: 2695.56


Trial 1:  84%|████████▍ | 841/1000 [13:37<02:28,  1.07it/s]


Trial 1, Episode 840, Average Reward: 2742.95


Trial 1:  85%|████████▌ | 851/1000 [13:47<02:23,  1.04it/s]


Trial 1, Episode 850, Average Reward: 2701.75


Trial 1:  86%|████████▌ | 861/1000 [13:56<02:16,  1.02it/s]


Trial 1, Episode 860, Average Reward: 2737.99


Trial 1:  87%|████████▋ | 871/1000 [14:06<02:11,  1.02s/it]


Trial 1, Episode 870, Average Reward: 2769.91


Trial 1:  88%|████████▊ | 881/1000 [14:17<02:08,  1.08s/it]


Trial 1, Episode 880, Average Reward: 2803.90


Trial 1:  89%|████████▉ | 891/1000 [14:27<01:48,  1.01it/s]


Trial 1, Episode 890, Average Reward: 2847.31


Trial 1:  90%|█████████ | 901/1000 [14:37<01:37,  1.02it/s]


Trial 1, Episode 900, Average Reward: 2874.32


Trial 1:  91%|█████████ | 911/1000 [14:47<01:22,  1.08it/s]


Trial 1, Episode 910, Average Reward: 2867.67


Trial 1:  92%|█████████▏| 921/1000 [14:57<01:16,  1.03it/s]


Trial 1, Episode 920, Average Reward: 2843.62


Trial 1:  93%|█████████▎| 931/1000 [15:06<01:03,  1.09it/s]


Trial 1, Episode 930, Average Reward: 2832.32


Trial 1:  94%|█████████▍| 941/1000 [15:15<00:55,  1.06it/s]


Trial 1, Episode 940, Average Reward: 2837.42


Trial 1:  95%|█████████▌| 951/1000 [15:25<00:47,  1.03it/s]


Trial 1, Episode 950, Average Reward: 2919.04


Trial 1:  96%|█████████▌| 961/1000 [15:35<00:38,  1.02it/s]


Trial 1, Episode 960, Average Reward: 2926.14


Trial 1:  97%|█████████▋| 971/1000 [15:44<00:27,  1.06it/s]


Trial 1, Episode 970, Average Reward: 2970.90


Trial 1:  98%|█████████▊| 981/1000 [15:53<00:17,  1.09it/s]


Trial 1, Episode 980, Average Reward: 2992.80


Trial 1:  99%|█████████▉| 991/1000 [16:03<00:08,  1.07it/s]


Trial 1, Episode 990, Average Reward: 2991.09


Trial 1: 100%|██████████| 1000/1000 [16:11<00:00,  1.03it/s]


Video saved as 'ppo_halfcheetah_trial_1.mp4'

Starting Trial 2/5


Trial 2:   0%|          | 1/1000 [00:01<16:42,  1.00s/it]


Trial 2, Episode 0, Average Reward: -764.96


Trial 2:   1%|          | 11/1000 [00:10<16:32,  1.00s/it]


Trial 2, Episode 10, Average Reward: -537.75


Trial 2:   2%|▏         | 21/1000 [00:21<16:00,  1.02it/s]


Trial 2, Episode 20, Average Reward: -438.37


Trial 2:   3%|▎         | 31/1000 [00:30<15:39,  1.03it/s]


Trial 2, Episode 30, Average Reward: -392.55


Trial 2:   4%|▍         | 41/1000 [00:41<18:07,  1.13s/it]


Trial 2, Episode 40, Average Reward: -352.98


Trial 2:   5%|▌         | 51/1000 [00:52<15:59,  1.01s/it]


Trial 2, Episode 50, Average Reward: -326.29


Trial 2:   6%|▌         | 61/1000 [01:02<16:38,  1.06s/it]


Trial 2, Episode 60, Average Reward: -311.00


Trial 2:   7%|▋         | 71/1000 [01:13<15:41,  1.01s/it]


Trial 2, Episode 70, Average Reward: -276.39


Trial 2:   8%|▊         | 81/1000 [01:23<15:41,  1.02s/it]


Trial 2, Episode 80, Average Reward: -249.45


Trial 2:   9%|▉         | 91/1000 [01:34<15:51,  1.05s/it]


Trial 2, Episode 90, Average Reward: -236.03


Trial 2:  10%|█         | 101/1000 [01:44<15:16,  1.02s/it]


Trial 2, Episode 100, Average Reward: -213.00


Trial 2:  11%|█         | 111/1000 [01:55<15:32,  1.05s/it]


Trial 2, Episode 110, Average Reward: -150.83


Trial 2:  12%|█▏        | 121/1000 [02:05<14:59,  1.02s/it]


Trial 2, Episode 120, Average Reward: -114.10


Trial 2:  13%|█▎        | 131/1000 [02:15<14:58,  1.03s/it]


Trial 2, Episode 130, Average Reward: -80.18


Trial 2:  14%|█▍        | 141/1000 [02:25<14:10,  1.01it/s]


Trial 2, Episode 140, Average Reward: -40.79


Trial 2:  15%|█▌        | 151/1000 [02:35<13:37,  1.04it/s]


Trial 2, Episode 150, Average Reward: 41.28


Trial 2:  16%|█▌        | 161/1000 [02:45<13:45,  1.02it/s]


Trial 2, Episode 160, Average Reward: 109.98


Trial 2:  17%|█▋        | 171/1000 [02:55<13:40,  1.01it/s]


Trial 2, Episode 170, Average Reward: 200.21


Trial 2:  18%|█▊        | 181/1000 [03:05<13:40,  1.00s/it]


Trial 2, Episode 180, Average Reward: 287.96


Trial 2:  19%|█▉        | 191/1000 [03:16<13:22,  1.01it/s]


Trial 2, Episode 190, Average Reward: 351.48


Trial 2:  20%|██        | 201/1000 [03:26<13:41,  1.03s/it]


Trial 2, Episode 200, Average Reward: 433.78


Trial 2:  21%|██        | 211/1000 [03:36<13:07,  1.00it/s]


Trial 2, Episode 210, Average Reward: 499.68


Trial 2:  22%|██▏       | 221/1000 [03:47<13:38,  1.05s/it]


Trial 2, Episode 220, Average Reward: 620.82


Trial 2:  23%|██▎       | 231/1000 [03:57<13:13,  1.03s/it]


Trial 2, Episode 230, Average Reward: 696.92


Trial 2:  24%|██▍       | 241/1000 [04:07<13:17,  1.05s/it]


Trial 2, Episode 240, Average Reward: 805.71


Trial 2:  25%|██▌       | 251/1000 [04:17<12:35,  1.01s/it]


Trial 2, Episode 250, Average Reward: 839.41


Trial 2:  26%|██▌       | 261/1000 [04:28<13:12,  1.07s/it]


Trial 2, Episode 260, Average Reward: 914.50


Trial 2:  27%|██▋       | 271/1000 [04:39<12:35,  1.04s/it]


Trial 2, Episode 270, Average Reward: 897.91


Trial 2:  28%|██▊       | 281/1000 [04:49<12:27,  1.04s/it]


Trial 2, Episode 280, Average Reward: 968.71


Trial 2:  29%|██▉       | 291/1000 [05:01<16:22,  1.39s/it]


Trial 2, Episode 290, Average Reward: 969.60


Trial 2:  30%|███       | 301/1000 [05:11<11:26,  1.02it/s]


Trial 2, Episode 300, Average Reward: 926.56


Trial 2:  31%|███       | 311/1000 [05:21<11:02,  1.04it/s]


Trial 2, Episode 310, Average Reward: 947.78


Trial 2:  32%|███▏      | 321/1000 [05:30<10:47,  1.05it/s]


Trial 2, Episode 320, Average Reward: 891.28


Trial 2:  33%|███▎      | 331/1000 [05:40<10:35,  1.05it/s]


Trial 2, Episode 330, Average Reward: 931.43


Trial 2:  34%|███▍      | 341/1000 [05:49<10:23,  1.06it/s]


Trial 2, Episode 340, Average Reward: 935.22


Trial 2:  35%|███▌      | 351/1000 [05:59<10:25,  1.04it/s]


Trial 2, Episode 350, Average Reward: 935.20


Trial 2:  36%|███▌      | 361/1000 [06:09<10:06,  1.05it/s]


Trial 2, Episode 360, Average Reward: 967.40


Trial 2:  37%|███▋      | 371/1000 [06:18<10:00,  1.05it/s]


Trial 2, Episode 370, Average Reward: 1033.52


Trial 2:  38%|███▊      | 381/1000 [06:28<09:46,  1.05it/s]


Trial 2, Episode 380, Average Reward: 993.80


Trial 2:  39%|███▉      | 391/1000 [06:38<09:40,  1.05it/s]


Trial 2, Episode 390, Average Reward: 1096.44


Trial 2:  40%|████      | 401/1000 [06:47<09:20,  1.07it/s]


Trial 2, Episode 400, Average Reward: 1210.68


Trial 2:  41%|████      | 411/1000 [06:57<09:15,  1.06it/s]


Trial 2, Episode 410, Average Reward: 1251.17


Trial 2:  42%|████▏     | 421/1000 [07:06<09:16,  1.04it/s]


Trial 2, Episode 420, Average Reward: 1360.34


Trial 2:  43%|████▎     | 431/1000 [07:16<08:57,  1.06it/s]


Trial 2, Episode 430, Average Reward: 1385.83


Trial 2:  44%|████▍     | 441/1000 [07:25<08:52,  1.05it/s]


Trial 2, Episode 440, Average Reward: 1414.79


Trial 2:  45%|████▌     | 451/1000 [07:36<08:50,  1.03it/s]


Trial 2, Episode 450, Average Reward: 1481.12


Trial 2:  46%|████▌     | 461/1000 [07:46<09:34,  1.07s/it]


Trial 2, Episode 460, Average Reward: 1490.93


Trial 2:  47%|████▋     | 471/1000 [07:55<07:49,  1.13it/s]


Trial 2, Episode 470, Average Reward: 1529.56


Trial 2:  48%|████▊     | 481/1000 [08:04<07:34,  1.14it/s]


Trial 2, Episode 480, Average Reward: 1535.99


Trial 2:  49%|████▉     | 491/1000 [08:13<07:41,  1.10it/s]


Trial 2, Episode 490, Average Reward: 1512.63


Trial 2:  50%|█████     | 501/1000 [08:22<07:16,  1.14it/s]


Trial 2, Episode 500, Average Reward: 1467.22


Trial 2:  51%|█████     | 511/1000 [08:31<06:59,  1.17it/s]


Trial 2, Episode 510, Average Reward: 1451.95


Trial 2:  52%|█████▏    | 521/1000 [08:39<06:54,  1.16it/s]


Trial 2, Episode 520, Average Reward: 1454.27


Trial 2:  53%|█████▎    | 531/1000 [08:48<06:50,  1.14it/s]


Trial 2, Episode 530, Average Reward: 1489.22


Trial 2:  54%|█████▍    | 541/1000 [08:57<06:37,  1.16it/s]


Trial 2, Episode 540, Average Reward: 1523.44


Trial 2:  55%|█████▌    | 551/1000 [09:05<06:27,  1.16it/s]


Trial 2, Episode 550, Average Reward: 1517.84


Trial 2:  56%|█████▌    | 561/1000 [09:14<06:17,  1.16it/s]


Trial 2, Episode 560, Average Reward: 1513.65


Trial 2:  57%|█████▋    | 571/1000 [09:23<06:09,  1.16it/s]


Trial 2, Episode 570, Average Reward: 1485.01


Trial 2:  58%|█████▊    | 581/1000 [09:31<06:01,  1.16it/s]


Trial 2, Episode 580, Average Reward: 1548.95


Trial 2:  59%|█████▉    | 591/1000 [09:40<05:51,  1.16it/s]


Trial 2, Episode 590, Average Reward: 1551.98


Trial 2:  60%|██████    | 601/1000 [09:49<05:57,  1.11it/s]


Trial 2, Episode 600, Average Reward: 1589.56


Trial 2:  61%|██████    | 611/1000 [09:57<05:34,  1.16it/s]


Trial 2, Episode 610, Average Reward: 1682.56


Trial 2:  62%|██████▏   | 621/1000 [10:06<05:23,  1.17it/s]


Trial 2, Episode 620, Average Reward: 1692.35


Trial 2:  63%|██████▎   | 631/1000 [10:15<05:17,  1.16it/s]


Trial 2, Episode 630, Average Reward: 1690.50


Trial 2:  64%|██████▍   | 641/1000 [10:25<05:38,  1.06it/s]


Trial 2, Episode 640, Average Reward: 1636.20


Trial 2:  65%|██████▌   | 651/1000 [10:33<05:09,  1.13it/s]


Trial 2, Episode 650, Average Reward: 1691.18


Trial 2:  66%|██████▌   | 661/1000 [10:43<05:07,  1.10it/s]


Trial 2, Episode 660, Average Reward: 1719.73


Trial 2:  67%|██████▋   | 671/1000 [10:52<05:06,  1.07it/s]


Trial 2, Episode 670, Average Reward: 1787.03


Trial 2:  68%|██████▊   | 681/1000 [11:02<04:57,  1.07it/s]


Trial 2, Episode 680, Average Reward: 1789.27


Trial 2:  69%|██████▉   | 691/1000 [11:11<04:44,  1.08it/s]


Trial 2, Episode 690, Average Reward: 1812.20


Trial 2:  70%|███████   | 701/1000 [11:21<04:40,  1.07it/s]


Trial 2, Episode 700, Average Reward: 1835.98


Trial 2:  71%|███████   | 711/1000 [11:30<04:26,  1.08it/s]


Trial 2, Episode 710, Average Reward: 1773.87


Trial 2:  72%|███████▏  | 721/1000 [11:39<04:20,  1.07it/s]


Trial 2, Episode 720, Average Reward: 1702.88


Trial 2:  73%|███████▎  | 731/1000 [11:49<04:07,  1.09it/s]


Trial 2, Episode 730, Average Reward: 1682.50


Trial 2:  74%|███████▍  | 741/1000 [11:58<04:05,  1.06it/s]


Trial 2, Episode 740, Average Reward: 1721.42


Trial 2:  75%|███████▌  | 751/1000 [12:07<03:51,  1.08it/s]


Trial 2, Episode 750, Average Reward: 1694.14


Trial 2:  76%|███████▌  | 761/1000 [12:17<03:39,  1.09it/s]


Trial 2, Episode 760, Average Reward: 1643.29


Trial 2:  77%|███████▋  | 771/1000 [12:26<03:29,  1.09it/s]


Trial 2, Episode 770, Average Reward: 1588.43


Trial 2:  78%|███████▊  | 781/1000 [12:35<03:23,  1.07it/s]


Trial 2, Episode 780, Average Reward: 1563.18


Trial 2:  79%|███████▉  | 791/1000 [12:45<03:13,  1.08it/s]


Trial 2, Episode 790, Average Reward: 1607.02


Trial 2:  80%|████████  | 801/1000 [12:54<03:02,  1.09it/s]


Trial 2, Episode 800, Average Reward: 1579.70


Trial 2:  81%|████████  | 811/1000 [13:03<02:54,  1.08it/s]


Trial 2, Episode 810, Average Reward: 1616.74


Trial 2:  82%|████████▏ | 821/1000 [13:13<02:48,  1.06it/s]


Trial 2, Episode 820, Average Reward: 1711.28


Trial 2:  83%|████████▎ | 831/1000 [13:22<02:38,  1.07it/s]


Trial 2, Episode 830, Average Reward: 1704.00


Trial 2:  84%|████████▍ | 841/1000 [13:31<02:27,  1.08it/s]


Trial 2, Episode 840, Average Reward: 1707.50


Trial 2:  85%|████████▌ | 851/1000 [13:41<02:17,  1.08it/s]


Trial 2, Episode 850, Average Reward: 1682.50


Trial 2:  86%|████████▌ | 861/1000 [13:50<02:08,  1.08it/s]


Trial 2, Episode 860, Average Reward: 1674.40


Trial 2:  87%|████████▋ | 871/1000 [13:59<01:58,  1.09it/s]


Trial 2, Episode 870, Average Reward: 1682.34


Trial 2:  88%|████████▊ | 881/1000 [14:09<01:49,  1.09it/s]


Trial 2, Episode 880, Average Reward: 1758.95


Trial 2:  89%|████████▉ | 891/1000 [14:18<01:41,  1.08it/s]


Trial 2, Episode 890, Average Reward: 1741.28


Trial 2:  90%|█████████ | 901/1000 [14:27<01:31,  1.08it/s]


Trial 2, Episode 900, Average Reward: 1830.65


Trial 2:  91%|█████████ | 911/1000 [14:37<01:25,  1.04it/s]


Trial 2, Episode 910, Average Reward: 1813.17


Trial 2:  92%|█████████▏| 921/1000 [14:46<01:14,  1.05it/s]


Trial 2, Episode 920, Average Reward: 1748.15


Trial 2:  93%|█████████▎| 931/1000 [14:56<01:03,  1.08it/s]


Trial 2, Episode 930, Average Reward: 1819.64


Trial 2:  94%|█████████▍| 941/1000 [15:05<00:54,  1.07it/s]


Trial 2, Episode 940, Average Reward: 1913.04


Trial 2:  95%|█████████▌| 951/1000 [15:14<00:45,  1.09it/s]


Trial 2, Episode 950, Average Reward: 1985.26


Trial 2:  96%|█████████▌| 961/1000 [15:24<00:36,  1.08it/s]


Trial 2, Episode 960, Average Reward: 2051.69


Trial 2:  97%|█████████▋| 971/1000 [15:33<00:26,  1.08it/s]


Trial 2, Episode 970, Average Reward: 2087.21


Trial 2:  98%|█████████▊| 981/1000 [15:42<00:17,  1.08it/s]


Trial 2, Episode 980, Average Reward: 2124.61


Trial 2:  99%|█████████▉| 991/1000 [15:52<00:08,  1.04it/s]


Trial 2, Episode 990, Average Reward: 2200.27


Trial 2: 100%|██████████| 1000/1000 [16:00<00:00,  1.04it/s]


Video saved as 'ppo_halfcheetah_trial_2.mp4'

Starting Trial 3/5


Trial 3:   0%|          | 1/1000 [00:01<17:56,  1.08s/it]


Trial 3, Episode 0, Average Reward: -521.39


Trial 3:   1%|          | 11/1000 [00:10<15:09,  1.09it/s]


Trial 3, Episode 10, Average Reward: -467.42


Trial 3:   2%|▏         | 21/1000 [00:19<15:05,  1.08it/s]


Trial 3, Episode 20, Average Reward: -416.01


Trial 3:   3%|▎         | 31/1000 [00:28<14:50,  1.09it/s]


Trial 3, Episode 30, Average Reward: -375.70


Trial 3:   4%|▍         | 41/1000 [00:38<14:54,  1.07it/s]


Trial 3, Episode 40, Average Reward: -366.20


Trial 3:   5%|▌         | 51/1000 [00:47<14:34,  1.09it/s]


Trial 3, Episode 50, Average Reward: -314.79


Trial 3:   6%|▌         | 61/1000 [00:56<14:05,  1.11it/s]


Trial 3, Episode 60, Average Reward: -276.00


Trial 3:   7%|▋         | 71/1000 [01:06<14:56,  1.04it/s]


Trial 3, Episode 70, Average Reward: -264.04


Trial 3:   8%|▊         | 81/1000 [01:15<14:03,  1.09it/s]


Trial 3, Episode 80, Average Reward: -250.52


Trial 3:   9%|▉         | 91/1000 [01:25<16:35,  1.09s/it]


Trial 3, Episode 90, Average Reward: -228.38


Trial 3:  10%|█         | 101/1000 [01:35<14:35,  1.03it/s]


Trial 3, Episode 100, Average Reward: -216.73


Trial 3:  11%|█         | 111/1000 [01:45<13:55,  1.06it/s]


Trial 3, Episode 110, Average Reward: -177.04


Trial 3:  12%|█▏        | 121/1000 [01:53<12:49,  1.14it/s]


Trial 3, Episode 120, Average Reward: -142.24


Trial 3:  13%|█▎        | 131/1000 [02:03<13:33,  1.07it/s]


Trial 3, Episode 130, Average Reward: -105.78


Trial 3:  14%|█▍        | 141/1000 [02:11<12:24,  1.15it/s]


Trial 3, Episode 140, Average Reward: -57.77


Trial 3:  15%|█▌        | 151/1000 [02:22<17:14,  1.22s/it]


Trial 3, Episode 150, Average Reward: -40.86


Trial 3:  16%|█▌        | 161/1000 [02:31<12:25,  1.12it/s]


Trial 3, Episode 160, Average Reward: -35.48


Trial 3:  17%|█▋        | 171/1000 [02:42<15:25,  1.12s/it]


Trial 3, Episode 170, Average Reward: 4.81


Trial 3:  18%|█▊        | 181/1000 [02:52<13:40,  1.00s/it]


Trial 3, Episode 180, Average Reward: 39.08


Trial 3:  19%|█▉        | 191/1000 [03:01<12:15,  1.10it/s]


Trial 3, Episode 190, Average Reward: 74.46


Trial 3:  20%|██        | 201/1000 [03:11<12:51,  1.04it/s]


Trial 3, Episode 200, Average Reward: 98.58


Trial 3:  21%|██        | 211/1000 [03:19<11:25,  1.15it/s]


Trial 3, Episode 210, Average Reward: 177.17


Trial 3:  22%|██▏       | 221/1000 [03:28<11:24,  1.14it/s]


Trial 3, Episode 220, Average Reward: 239.65


Trial 3:  23%|██▎       | 231/1000 [03:37<11:03,  1.16it/s]


Trial 3, Episode 230, Average Reward: 314.03


Trial 3:  24%|██▍       | 241/1000 [03:45<10:55,  1.16it/s]


Trial 3, Episode 240, Average Reward: 391.47


Trial 3:  25%|██▌       | 251/1000 [03:54<10:45,  1.16it/s]


Trial 3, Episode 250, Average Reward: 480.50


Trial 3:  26%|██▌       | 261/1000 [04:03<10:44,  1.15it/s]


Trial 3, Episode 260, Average Reward: 618.66


Trial 3:  27%|██▋       | 271/1000 [04:12<10:31,  1.16it/s]


Trial 3, Episode 270, Average Reward: 705.98


Trial 3:  28%|██▊       | 281/1000 [04:20<10:20,  1.16it/s]


Trial 3, Episode 280, Average Reward: 851.64


Trial 3:  29%|██▉       | 291/1000 [04:29<10:22,  1.14it/s]


Trial 3, Episode 290, Average Reward: 1021.04


Trial 3:  30%|███       | 301/1000 [04:38<10:04,  1.16it/s]


Trial 3, Episode 300, Average Reward: 1151.05


Trial 3:  31%|███       | 311/1000 [04:46<09:53,  1.16it/s]


Trial 3, Episode 310, Average Reward: 1263.21


Trial 3:  32%|███▏      | 321/1000 [04:55<10:02,  1.13it/s]


Trial 3, Episode 320, Average Reward: 1322.52


Trial 3:  33%|███▎      | 331/1000 [05:04<09:39,  1.15it/s]


Trial 3, Episode 330, Average Reward: 1320.18


Trial 3:  34%|███▍      | 341/1000 [05:13<09:28,  1.16it/s]


Trial 3, Episode 340, Average Reward: 1368.15


Trial 3:  35%|███▌      | 351/1000 [05:21<09:24,  1.15it/s]


Trial 3, Episode 350, Average Reward: 1476.26


Trial 3:  36%|███▌      | 361/1000 [05:30<09:12,  1.16it/s]


Trial 3, Episode 360, Average Reward: 1465.94


Trial 3:  37%|███▋      | 371/1000 [05:39<09:01,  1.16it/s]


Trial 3, Episode 370, Average Reward: 1507.78


Trial 3:  38%|███▊      | 381/1000 [05:47<08:55,  1.15it/s]


Trial 3, Episode 380, Average Reward: 1446.67


Trial 3:  39%|███▉      | 391/1000 [05:56<08:44,  1.16it/s]


Trial 3, Episode 390, Average Reward: 1436.00


Trial 3:  40%|████      | 401/1000 [06:05<08:39,  1.15it/s]


Trial 3, Episode 400, Average Reward: 1441.22


Trial 3:  41%|████      | 411/1000 [06:13<08:29,  1.16it/s]


Trial 3, Episode 410, Average Reward: 1444.70


Trial 3:  42%|████▏     | 421/1000 [06:22<08:20,  1.16it/s]


Trial 3, Episode 420, Average Reward: 1557.82


Trial 3:  43%|████▎     | 431/1000 [06:31<08:12,  1.16it/s]


Trial 3, Episode 430, Average Reward: 1668.80


Trial 3:  44%|████▍     | 441/1000 [06:39<08:05,  1.15it/s]


Trial 3, Episode 440, Average Reward: 1635.43


Trial 3:  45%|████▌     | 451/1000 [06:48<07:54,  1.16it/s]


Trial 3, Episode 450, Average Reward: 1570.55


Trial 3:  46%|████▌     | 461/1000 [06:57<07:42,  1.17it/s]


Trial 3, Episode 460, Average Reward: 1620.89


Trial 3:  47%|████▋     | 471/1000 [07:05<07:55,  1.11it/s]


Trial 3, Episode 470, Average Reward: 1611.49


Trial 3:  48%|████▊     | 481/1000 [07:14<07:33,  1.14it/s]


Trial 3, Episode 480, Average Reward: 1646.52


Trial 3:  49%|████▉     | 491/1000 [07:23<07:13,  1.18it/s]


Trial 3, Episode 490, Average Reward: 1572.66


Trial 3:  50%|█████     | 501/1000 [07:31<07:07,  1.17it/s]


Trial 3, Episode 500, Average Reward: 1608.90


Trial 3:  51%|█████     | 511/1000 [07:40<07:00,  1.16it/s]


Trial 3, Episode 510, Average Reward: 1542.02


Trial 3:  52%|█████▏    | 521/1000 [07:49<06:48,  1.17it/s]


Trial 3, Episode 520, Average Reward: 1409.20


Trial 3:  53%|█████▎    | 531/1000 [07:57<06:41,  1.17it/s]


Trial 3, Episode 530, Average Reward: 1320.90


Trial 3:  54%|█████▍    | 541/1000 [08:06<06:38,  1.15it/s]


Trial 3, Episode 540, Average Reward: 1353.09


Trial 3:  55%|█████▌    | 551/1000 [08:15<06:27,  1.16it/s]


Trial 3, Episode 550, Average Reward: 1392.69


Trial 3:  56%|█████▌    | 561/1000 [08:23<06:16,  1.17it/s]


Trial 3, Episode 560, Average Reward: 1343.38


Trial 3:  57%|█████▋    | 571/1000 [08:32<06:22,  1.12it/s]


Trial 3, Episode 570, Average Reward: 1299.56


Trial 3:  58%|█████▊    | 581/1000 [08:41<06:02,  1.16it/s]


Trial 3, Episode 580, Average Reward: 1317.82


Trial 3:  59%|█████▉    | 591/1000 [08:50<05:52,  1.16it/s]


Trial 3, Episode 590, Average Reward: 1339.25


Trial 3:  60%|██████    | 601/1000 [08:58<05:41,  1.17it/s]


Trial 3, Episode 600, Average Reward: 1253.61


Trial 3:  61%|██████    | 611/1000 [09:08<06:03,  1.07it/s]


Trial 3, Episode 610, Average Reward: 1261.75


Trial 3:  62%|██████▏   | 621/1000 [09:16<05:28,  1.15it/s]


Trial 3, Episode 620, Average Reward: 1285.96


Trial 3:  63%|██████▎   | 631/1000 [09:25<05:19,  1.16it/s]


Trial 3, Episode 630, Average Reward: 1379.07


Trial 3:  64%|██████▍   | 641/1000 [09:34<05:10,  1.16it/s]


Trial 3, Episode 640, Average Reward: 1398.99


Trial 3:  65%|██████▌   | 651/1000 [09:42<04:59,  1.16it/s]


Trial 3, Episode 650, Average Reward: 1400.37


Trial 3:  66%|██████▌   | 661/1000 [09:51<04:53,  1.16it/s]


Trial 3, Episode 660, Average Reward: 1420.57


Trial 3:  67%|██████▋   | 671/1000 [10:00<04:47,  1.14it/s]


Trial 3, Episode 670, Average Reward: 1452.93


Trial 3:  68%|██████▊   | 681/1000 [10:08<04:35,  1.16it/s]


Trial 3, Episode 680, Average Reward: 1472.12


Trial 3:  69%|██████▉   | 691/1000 [10:17<04:28,  1.15it/s]


Trial 3, Episode 690, Average Reward: 1488.21


Trial 3:  70%|███████   | 701/1000 [10:26<04:16,  1.17it/s]


Trial 3, Episode 700, Average Reward: 1576.49


Trial 3:  71%|███████   | 711/1000 [10:34<04:08,  1.16it/s]


Trial 3, Episode 710, Average Reward: 1628.84


Trial 3:  72%|███████▏  | 721/1000 [10:43<03:57,  1.17it/s]


Trial 3, Episode 720, Average Reward: 1744.64


Trial 3:  73%|███████▎  | 731/1000 [10:51<03:50,  1.17it/s]


Trial 3, Episode 730, Average Reward: 1760.68


Trial 3:  74%|███████▍  | 741/1000 [11:00<03:40,  1.17it/s]


Trial 3, Episode 740, Average Reward: 1837.84


Trial 3:  75%|███████▌  | 751/1000 [11:09<03:32,  1.17it/s]


Trial 3, Episode 750, Average Reward: 1874.36


Trial 3:  76%|███████▌  | 761/1000 [11:17<03:25,  1.16it/s]


Trial 3, Episode 760, Average Reward: 1924.21


Trial 3:  77%|███████▋  | 771/1000 [11:26<03:14,  1.18it/s]


Trial 3, Episode 770, Average Reward: 1951.56


Trial 3:  78%|███████▊  | 781/1000 [11:35<03:16,  1.12it/s]


Trial 3, Episode 780, Average Reward: 1928.15


Trial 3:  79%|███████▉  | 791/1000 [11:43<02:58,  1.17it/s]


Trial 3, Episode 790, Average Reward: 1896.36


Trial 3:  80%|████████  | 801/1000 [11:52<02:50,  1.17it/s]


Trial 3, Episode 800, Average Reward: 1882.08


Trial 3:  81%|████████  | 811/1000 [12:00<02:40,  1.18it/s]


Trial 3, Episode 810, Average Reward: 1904.70


Trial 3:  82%|████████▏ | 821/1000 [12:09<02:31,  1.18it/s]


Trial 3, Episode 820, Average Reward: 1862.98


Trial 3:  83%|████████▎ | 831/1000 [12:18<02:27,  1.15it/s]


Trial 3, Episode 830, Average Reward: 1841.18


Trial 3:  84%|████████▍ | 841/1000 [12:26<02:16,  1.17it/s]


Trial 3, Episode 840, Average Reward: 1826.16


Trial 3:  85%|████████▌ | 851/1000 [12:35<02:06,  1.18it/s]


Trial 3, Episode 850, Average Reward: 1799.21


Trial 3:  86%|████████▌ | 861/1000 [12:43<01:58,  1.18it/s]


Trial 3, Episode 860, Average Reward: 1861.14


Trial 3:  87%|████████▋ | 871/1000 [12:52<01:51,  1.15it/s]


Trial 3, Episode 870, Average Reward: 1955.16


Trial 3:  88%|████████▊ | 881/1000 [13:00<01:41,  1.17it/s]


Trial 3, Episode 880, Average Reward: 2100.03


Trial 3:  89%|████████▉ | 891/1000 [13:09<01:32,  1.17it/s]


Trial 3, Episode 890, Average Reward: 2183.58


Trial 3:  90%|█████████ | 901/1000 [13:18<01:25,  1.15it/s]


Trial 3, Episode 900, Average Reward: 2233.85


Trial 3:  91%|█████████ | 911/1000 [13:26<01:16,  1.16it/s]


Trial 3, Episode 910, Average Reward: 2266.32


Trial 3:  92%|█████████▏| 921/1000 [13:35<01:07,  1.17it/s]


Trial 3, Episode 920, Average Reward: 2258.42


Trial 3:  93%|█████████▎| 931/1000 [13:44<01:03,  1.08it/s]


Trial 3, Episode 930, Average Reward: 2377.63


Trial 3:  94%|█████████▍| 941/1000 [13:52<00:51,  1.14it/s]


Trial 3, Episode 940, Average Reward: 2414.41


Trial 3:  95%|█████████▌| 951/1000 [14:01<00:42,  1.16it/s]


Trial 3, Episode 950, Average Reward: 2504.75


Trial 3:  96%|█████████▌| 961/1000 [14:10<00:33,  1.16it/s]


Trial 3, Episode 960, Average Reward: 2457.62


Trial 3:  97%|█████████▋| 971/1000 [14:18<00:25,  1.14it/s]


Trial 3, Episode 970, Average Reward: 2430.82


Trial 3:  98%|█████████▊| 981/1000 [14:27<00:16,  1.17it/s]


Trial 3, Episode 980, Average Reward: 2355.79


Trial 3:  99%|█████████▉| 991/1000 [14:36<00:07,  1.17it/s]


Trial 3, Episode 990, Average Reward: 2313.26


Trial 3: 100%|██████████| 1000/1000 [14:43<00:00,  1.13it/s]


Video saved as 'ppo_halfcheetah_trial_3.mp4'

Starting Trial 4/5


Trial 4:   0%|          | 1/1000 [00:00<16:13,  1.03it/s]


Trial 4, Episode 0, Average Reward: -647.42


Trial 4:   1%|          | 11/1000 [00:09<14:30,  1.14it/s]


Trial 4, Episode 10, Average Reward: -573.00


Trial 4:   2%|▏         | 21/1000 [00:19<14:28,  1.13it/s]


Trial 4, Episode 20, Average Reward: -554.74


Trial 4:   3%|▎         | 31/1000 [00:27<13:54,  1.16it/s]


Trial 4, Episode 30, Average Reward: -515.00


Trial 4:   4%|▍         | 41/1000 [00:36<13:54,  1.15it/s]


Trial 4, Episode 40, Average Reward: -482.17


Trial 4:   5%|▌         | 51/1000 [00:45<13:50,  1.14it/s]


Trial 4, Episode 50, Average Reward: -454.18


Trial 4:   6%|▌         | 61/1000 [00:53<13:32,  1.16it/s]


Trial 4, Episode 60, Average Reward: -435.28


Trial 4:   7%|▋         | 71/1000 [01:02<13:20,  1.16it/s]


Trial 4, Episode 70, Average Reward: -402.69


Trial 4:   8%|▊         | 81/1000 [01:11<13:11,  1.16it/s]


Trial 4, Episode 80, Average Reward: -362.64


Trial 4:   9%|▉         | 91/1000 [01:19<13:11,  1.15it/s]


Trial 4, Episode 90, Average Reward: -323.76


Trial 4:  10%|█         | 101/1000 [01:28<12:49,  1.17it/s]


Trial 4, Episode 100, Average Reward: -278.28


Trial 4:  11%|█         | 111/1000 [01:37<12:49,  1.15it/s]


Trial 4, Episode 110, Average Reward: -204.15


Trial 4:  12%|█▏        | 121/1000 [01:45<12:36,  1.16it/s]


Trial 4, Episode 120, Average Reward: -129.46


Trial 4:  13%|█▎        | 131/1000 [01:54<12:26,  1.16it/s]


Trial 4, Episode 130, Average Reward: -60.40


Trial 4:  14%|█▍        | 141/1000 [02:03<12:14,  1.17it/s]


Trial 4, Episode 140, Average Reward: 8.08


Trial 4:  15%|█▌        | 151/1000 [02:11<12:13,  1.16it/s]


Trial 4, Episode 150, Average Reward: 72.69


Trial 4:  16%|█▌        | 161/1000 [02:20<12:13,  1.14it/s]


Trial 4, Episode 160, Average Reward: 139.96


Trial 4:  17%|█▋        | 171/1000 [02:29<11:53,  1.16it/s]


Trial 4, Episode 170, Average Reward: 202.28


Trial 4:  18%|█▊        | 181/1000 [02:37<11:53,  1.15it/s]


Trial 4, Episode 180, Average Reward: 256.59


Trial 4:  19%|█▉        | 191/1000 [02:46<11:44,  1.15it/s]


Trial 4, Episode 190, Average Reward: 306.38


Trial 4:  20%|██        | 201/1000 [02:55<11:36,  1.15it/s]


Trial 4, Episode 200, Average Reward: 347.15


Trial 4:  21%|██        | 211/1000 [03:04<11:18,  1.16it/s]


Trial 4, Episode 210, Average Reward: 380.16


Trial 4:  22%|██▏       | 221/1000 [03:12<11:10,  1.16it/s]


Trial 4, Episode 220, Average Reward: 407.30


Trial 4:  23%|██▎       | 231/1000 [03:21<11:01,  1.16it/s]


Trial 4, Episode 230, Average Reward: 438.88


Trial 4:  24%|██▍       | 241/1000 [03:30<10:53,  1.16it/s]


Trial 4, Episode 240, Average Reward: 471.32


Trial 4:  25%|██▌       | 251/1000 [03:38<10:48,  1.16it/s]


Trial 4, Episode 250, Average Reward: 498.87


Trial 4:  26%|██▌       | 261/1000 [03:47<11:03,  1.11it/s]


Trial 4, Episode 260, Average Reward: 523.86


Trial 4:  27%|██▋       | 271/1000 [03:56<10:36,  1.15it/s]


Trial 4, Episode 270, Average Reward: 547.99


Trial 4:  28%|██▊       | 281/1000 [04:04<10:20,  1.16it/s]


Trial 4, Episode 280, Average Reward: 564.73


Trial 4:  29%|██▉       | 291/1000 [04:13<10:09,  1.16it/s]


Trial 4, Episode 290, Average Reward: 575.35


Trial 4:  30%|███       | 301/1000 [04:21<10:09,  1.15it/s]


Trial 4, Episode 300, Average Reward: 585.25


Trial 4:  31%|███       | 311/1000 [04:30<09:54,  1.16it/s]


Trial 4, Episode 310, Average Reward: 603.41


Trial 4:  32%|███▏      | 321/1000 [04:39<09:42,  1.17it/s]


Trial 4, Episode 320, Average Reward: 634.61


Trial 4:  33%|███▎      | 331/1000 [04:47<09:37,  1.16it/s]


Trial 4, Episode 330, Average Reward: 649.89


Trial 4:  34%|███▍      | 341/1000 [04:56<09:40,  1.14it/s]


Trial 4, Episode 340, Average Reward: 657.81


Trial 4:  35%|███▌      | 351/1000 [05:05<09:21,  1.16it/s]


Trial 4, Episode 350, Average Reward: 670.33


Trial 4:  36%|███▌      | 361/1000 [05:13<09:15,  1.15it/s]


Trial 4, Episode 360, Average Reward: 688.16


Trial 4:  37%|███▋      | 371/1000 [05:22<09:04,  1.16it/s]


Trial 4, Episode 370, Average Reward: 687.39


Trial 4:  38%|███▊      | 381/1000 [05:31<08:58,  1.15it/s]


Trial 4, Episode 380, Average Reward: 703.17


Trial 4:  39%|███▉      | 391/1000 [05:39<08:43,  1.16it/s]


Trial 4, Episode 390, Average Reward: 723.54


Trial 4:  40%|████      | 401/1000 [05:48<08:36,  1.16it/s]


Trial 4, Episode 400, Average Reward: 745.48


Trial 4:  41%|████      | 411/1000 [05:57<08:26,  1.16it/s]


Trial 4, Episode 410, Average Reward: 769.01


Trial 4:  42%|████▏     | 421/1000 [06:05<08:23,  1.15it/s]


Trial 4, Episode 420, Average Reward: 771.69


Trial 4:  43%|████▎     | 431/1000 [06:14<08:15,  1.15it/s]


Trial 4, Episode 430, Average Reward: 786.65


Trial 4:  44%|████▍     | 441/1000 [06:23<08:01,  1.16it/s]


Trial 4, Episode 440, Average Reward: 805.77


Trial 4:  45%|████▌     | 451/1000 [06:31<07:56,  1.15it/s]


Trial 4, Episode 450, Average Reward: 827.41


Trial 4:  46%|████▌     | 461/1000 [06:40<07:42,  1.17it/s]


Trial 4, Episode 460, Average Reward: 847.30


Trial 4:  47%|████▋     | 471/1000 [06:49<07:35,  1.16it/s]


Trial 4, Episode 470, Average Reward: 870.43


Trial 4:  48%|████▊     | 481/1000 [06:57<07:36,  1.14it/s]


Trial 4, Episode 480, Average Reward: 885.63


Trial 4:  49%|████▉     | 491/1000 [07:06<07:20,  1.16it/s]


Trial 4, Episode 490, Average Reward: 905.96


Trial 4:  50%|█████     | 501/1000 [07:15<07:08,  1.16it/s]


Trial 4, Episode 500, Average Reward: 924.09


Trial 4:  51%|█████     | 511/1000 [07:23<07:09,  1.14it/s]


Trial 4, Episode 510, Average Reward: 922.76


Trial 4:  52%|█████▏    | 521/1000 [07:32<07:07,  1.12it/s]


Trial 4, Episode 520, Average Reward: 936.10


Trial 4:  53%|█████▎    | 531/1000 [07:41<06:48,  1.15it/s]


Trial 4, Episode 530, Average Reward: 941.70


Trial 4:  54%|█████▍    | 541/1000 [07:49<06:34,  1.16it/s]


Trial 4, Episode 540, Average Reward: 948.57


Trial 4:  55%|█████▌    | 551/1000 [07:58<06:23,  1.17it/s]


Trial 4, Episode 550, Average Reward: 951.43


Trial 4:  56%|█████▌    | 561/1000 [08:07<06:20,  1.15it/s]


Trial 4, Episode 560, Average Reward: 946.72


Trial 4:  57%|█████▋    | 571/1000 [08:15<06:09,  1.16it/s]


Trial 4, Episode 570, Average Reward: 954.58


Trial 4:  58%|█████▊    | 581/1000 [08:24<05:57,  1.17it/s]


Trial 4, Episode 580, Average Reward: 958.87


Trial 4:  59%|█████▉    | 591/1000 [08:33<06:00,  1.14it/s]


Trial 4, Episode 590, Average Reward: 957.67


Trial 4:  60%|██████    | 601/1000 [08:41<05:46,  1.15it/s]


Trial 4, Episode 600, Average Reward: 956.25


Trial 4:  61%|██████    | 611/1000 [08:50<05:36,  1.15it/s]


Trial 4, Episode 610, Average Reward: 960.24


Trial 4:  62%|██████▏   | 621/1000 [08:59<05:36,  1.13it/s]


Trial 4, Episode 620, Average Reward: 953.54


Trial 4:  63%|██████▎   | 631/1000 [09:07<05:20,  1.15it/s]


Trial 4, Episode 630, Average Reward: 956.89


Trial 4:  64%|██████▍   | 641/1000 [09:16<05:10,  1.16it/s]


Trial 4, Episode 640, Average Reward: 958.14


Trial 4:  65%|██████▌   | 651/1000 [09:25<05:11,  1.12it/s]


Trial 4, Episode 650, Average Reward: 949.11


Trial 4:  66%|██████▌   | 661/1000 [09:33<04:50,  1.17it/s]


Trial 4, Episode 660, Average Reward: 956.84


Trial 4:  67%|██████▋   | 671/1000 [09:42<04:42,  1.16it/s]


Trial 4, Episode 670, Average Reward: 956.15


Trial 4:  68%|██████▊   | 681/1000 [09:51<04:36,  1.15it/s]


Trial 4, Episode 680, Average Reward: 956.45


Trial 4:  69%|██████▉   | 691/1000 [09:59<04:27,  1.16it/s]


Trial 4, Episode 690, Average Reward: 952.31


Trial 4:  70%|███████   | 701/1000 [10:08<04:19,  1.15it/s]


Trial 4, Episode 700, Average Reward: 955.18


Trial 4:  71%|███████   | 711/1000 [10:17<04:09,  1.16it/s]


Trial 4, Episode 710, Average Reward: 967.54


Trial 4:  72%|███████▏  | 721/1000 [10:25<04:03,  1.15it/s]


Trial 4, Episode 720, Average Reward: 982.81


Trial 4:  73%|███████▎  | 731/1000 [10:34<03:54,  1.15it/s]


Trial 4, Episode 730, Average Reward: 993.65


Trial 4:  74%|███████▍  | 741/1000 [10:43<03:45,  1.15it/s]


Trial 4, Episode 740, Average Reward: 1005.25


Trial 4:  75%|███████▌  | 751/1000 [10:51<03:33,  1.17it/s]


Trial 4, Episode 750, Average Reward: 1029.37


Trial 4:  76%|███████▌  | 761/1000 [11:00<03:25,  1.16it/s]


Trial 4, Episode 760, Average Reward: 1037.31


Trial 4:  77%|███████▋  | 771/1000 [11:09<03:18,  1.15it/s]


Trial 4, Episode 770, Average Reward: 1048.06


Trial 4:  78%|███████▊  | 781/1000 [11:17<03:11,  1.14it/s]


Trial 4, Episode 780, Average Reward: 1047.16


Trial 4:  79%|███████▉  | 791/1000 [11:26<03:00,  1.16it/s]


Trial 4, Episode 790, Average Reward: 1049.41


Trial 4:  80%|████████  | 801/1000 [11:35<02:52,  1.16it/s]


Trial 4, Episode 800, Average Reward: 1048.79


Trial 4:  81%|████████  | 811/1000 [11:43<02:45,  1.14it/s]


Trial 4, Episode 810, Average Reward: 1040.33


Trial 4:  82%|████████▏ | 821/1000 [11:52<02:33,  1.17it/s]


Trial 4, Episode 820, Average Reward: 1043.10


Trial 4:  83%|████████▎ | 831/1000 [12:01<02:25,  1.16it/s]


Trial 4, Episode 830, Average Reward: 1037.42


Trial 4:  84%|████████▍ | 841/1000 [12:09<02:19,  1.14it/s]


Trial 4, Episode 840, Average Reward: 1024.18


Trial 4:  85%|████████▌ | 851/1000 [12:18<02:09,  1.15it/s]


Trial 4, Episode 850, Average Reward: 1018.95


Trial 4:  86%|████████▌ | 861/1000 [12:27<01:59,  1.16it/s]


Trial 4, Episode 860, Average Reward: 1023.79


Trial 4:  87%|████████▋ | 871/1000 [12:35<01:51,  1.16it/s]


Trial 4, Episode 870, Average Reward: 1023.20


Trial 4:  88%|████████▊ | 881/1000 [12:44<01:41,  1.17it/s]


Trial 4, Episode 880, Average Reward: 1038.95


Trial 4:  89%|████████▉ | 891/1000 [12:53<01:33,  1.16it/s]


Trial 4, Episode 890, Average Reward: 1048.15


Trial 4:  90%|█████████ | 901/1000 [13:01<01:25,  1.16it/s]


Trial 4, Episode 900, Average Reward: 1045.61


Trial 4:  91%|█████████ | 911/1000 [13:10<01:16,  1.17it/s]


Trial 4, Episode 910, Average Reward: 1051.18


Trial 4:  92%|█████████▏| 921/1000 [13:19<01:09,  1.14it/s]


Trial 4, Episode 920, Average Reward: 1056.79


Trial 4:  93%|█████████▎| 931/1000 [13:27<00:59,  1.16it/s]


Trial 4, Episode 930, Average Reward: 1054.66


Trial 4:  94%|█████████▍| 941/1000 [13:36<00:50,  1.18it/s]


Trial 4, Episode 940, Average Reward: 1061.01


Trial 4:  95%|█████████▌| 951/1000 [13:45<00:42,  1.16it/s]


Trial 4, Episode 950, Average Reward: 1066.91


Trial 4:  96%|█████████▌| 961/1000 [13:53<00:33,  1.16it/s]


Trial 4, Episode 960, Average Reward: 1060.72


Trial 4:  97%|█████████▋| 971/1000 [14:02<00:26,  1.11it/s]


Trial 4, Episode 970, Average Reward: 1049.84


Trial 4:  98%|█████████▊| 981/1000 [14:11<00:16,  1.17it/s]


Trial 4, Episode 980, Average Reward: 1040.38


Trial 4:  99%|█████████▉| 991/1000 [14:20<00:08,  1.12it/s]


Trial 4, Episode 990, Average Reward: 1039.62


Trial 4: 100%|██████████| 1000/1000 [14:27<00:00,  1.15it/s]


Video saved as 'ppo_halfcheetah_trial_4.mp4'

Starting Trial 5/5


Trial 5:   0%|          | 1/1000 [00:00<16:11,  1.03it/s]


Trial 5, Episode 0, Average Reward: -451.77


Trial 5:   1%|          | 11/1000 [00:09<14:18,  1.15it/s]


Trial 5, Episode 10, Average Reward: -228.97


Trial 5:   2%|▏         | 21/1000 [00:18<14:13,  1.15it/s]


Trial 5, Episode 20, Average Reward: -93.14


Trial 5:   3%|▎         | 31/1000 [00:27<13:56,  1.16it/s]


Trial 5, Episode 30, Average Reward: -73.84


Trial 5:   4%|▍         | 41/1000 [00:35<13:51,  1.15it/s]


Trial 5, Episode 40, Average Reward: -20.73


Trial 5:   5%|▌         | 51/1000 [00:44<13:49,  1.14it/s]


Trial 5, Episode 50, Average Reward: -52.00


Trial 5:   6%|▌         | 61/1000 [00:53<13:36,  1.15it/s]


Trial 5, Episode 60, Average Reward: -66.20


Trial 5:   7%|▋         | 71/1000 [01:02<13:16,  1.17it/s]


Trial 5, Episode 70, Average Reward: -39.27


Trial 5:   8%|▊         | 81/1000 [01:10<13:11,  1.16it/s]


Trial 5, Episode 80, Average Reward: -55.09


Trial 5:   9%|▉         | 91/1000 [01:19<13:08,  1.15it/s]


Trial 5, Episode 90, Average Reward: -26.99


Trial 5:  10%|█         | 101/1000 [01:28<12:51,  1.16it/s]


Trial 5, Episode 100, Average Reward: -17.25


Trial 5:  11%|█         | 111/1000 [01:36<12:39,  1.17it/s]


Trial 5, Episode 110, Average Reward: 37.91


Trial 5:  12%|█▏        | 121/1000 [01:45<12:40,  1.16it/s]


Trial 5, Episode 120, Average Reward: 28.19


Trial 5:  13%|█▎        | 131/1000 [01:54<12:45,  1.14it/s]


Trial 5, Episode 130, Average Reward: 18.24


Trial 5:  14%|█▍        | 141/1000 [02:02<12:19,  1.16it/s]


Trial 5, Episode 140, Average Reward: 17.69


Trial 5:  15%|█▌        | 151/1000 [02:11<12:14,  1.16it/s]


Trial 5, Episode 150, Average Reward: 80.36


Trial 5:  16%|█▌        | 161/1000 [02:20<12:06,  1.15it/s]


Trial 5, Episode 160, Average Reward: 115.45


Trial 5:  17%|█▋        | 171/1000 [02:28<12:21,  1.12it/s]


Trial 5, Episode 170, Average Reward: 168.97


Trial 5:  18%|█▊        | 181/1000 [02:37<11:55,  1.14it/s]


Trial 5, Episode 180, Average Reward: 189.58


Trial 5:  19%|█▉        | 191/1000 [02:46<11:47,  1.14it/s]


Trial 5, Episode 190, Average Reward: 216.72


Trial 5:  20%|██        | 201/1000 [02:54<11:21,  1.17it/s]


Trial 5, Episode 200, Average Reward: 261.37


Trial 5:  21%|██        | 211/1000 [03:03<11:29,  1.14it/s]


Trial 5, Episode 210, Average Reward: 273.08


Trial 5:  22%|██▏       | 221/1000 [03:12<11:14,  1.15it/s]


Trial 5, Episode 220, Average Reward: 338.00


Trial 5:  23%|██▎       | 231/1000 [03:21<11:05,  1.16it/s]


Trial 5, Episode 230, Average Reward: 425.82


Trial 5:  24%|██▍       | 241/1000 [03:29<11:22,  1.11it/s]


Trial 5, Episode 240, Average Reward: 502.37


Trial 5:  25%|██▌       | 251/1000 [03:38<10:50,  1.15it/s]


Trial 5, Episode 250, Average Reward: 510.09


Trial 5:  26%|██▌       | 261/1000 [03:47<10:43,  1.15it/s]


Trial 5, Episode 260, Average Reward: 594.37


Trial 5:  27%|██▋       | 271/1000 [03:55<10:55,  1.11it/s]


Trial 5, Episode 270, Average Reward: 643.30


Trial 5:  28%|██▊       | 281/1000 [04:04<10:23,  1.15it/s]


Trial 5, Episode 280, Average Reward: 685.62


Trial 5:  29%|██▉       | 291/1000 [04:13<10:10,  1.16it/s]


Trial 5, Episode 290, Average Reward: 683.57


Trial 5:  30%|███       | 301/1000 [04:22<10:18,  1.13it/s]


Trial 5, Episode 300, Average Reward: 693.71


Trial 5:  31%|███       | 311/1000 [04:30<09:50,  1.17it/s]


Trial 5, Episode 310, Average Reward: 707.38


Trial 5:  32%|███▏      | 321/1000 [04:39<09:40,  1.17it/s]


Trial 5, Episode 320, Average Reward: 739.89


Trial 5:  33%|███▎      | 331/1000 [04:48<09:39,  1.16it/s]


Trial 5, Episode 330, Average Reward: 755.31


Trial 5:  34%|███▍      | 341/1000 [04:56<09:26,  1.16it/s]


Trial 5, Episode 340, Average Reward: 713.37


Trial 5:  35%|███▌      | 351/1000 [05:05<09:17,  1.16it/s]


Trial 5, Episode 350, Average Reward: 765.55


Trial 5:  36%|███▌      | 361/1000 [05:13<09:08,  1.17it/s]


Trial 5, Episode 360, Average Reward: 759.66


Trial 5:  37%|███▋      | 371/1000 [05:22<09:04,  1.16it/s]


Trial 5, Episode 370, Average Reward: 692.47


Trial 5:  38%|███▊      | 381/1000 [05:31<08:49,  1.17it/s]


Trial 5, Episode 380, Average Reward: 755.72


Trial 5:  39%|███▉      | 391/1000 [05:39<08:55,  1.14it/s]


Trial 5, Episode 390, Average Reward: 776.76


Trial 5:  40%|████      | 401/1000 [05:48<08:37,  1.16it/s]


Trial 5, Episode 400, Average Reward: 767.25


Trial 5:  41%|████      | 411/1000 [05:57<08:30,  1.15it/s]


Trial 5, Episode 410, Average Reward: 784.57


Trial 5:  42%|████▏     | 421/1000 [06:05<08:13,  1.17it/s]


Trial 5, Episode 420, Average Reward: 787.15


Trial 5:  43%|████▎     | 431/1000 [06:14<08:15,  1.15it/s]


Trial 5, Episode 430, Average Reward: 800.70


Trial 5:  44%|████▍     | 441/1000 [06:23<08:06,  1.15it/s]


Trial 5, Episode 440, Average Reward: 852.05


Trial 5:  45%|████▌     | 451/1000 [06:32<07:56,  1.15it/s]


Trial 5, Episode 450, Average Reward: 867.82


Trial 5:  46%|████▌     | 461/1000 [06:40<07:50,  1.15it/s]


Trial 5, Episode 460, Average Reward: 929.60


Trial 5:  47%|████▋     | 471/1000 [06:49<07:37,  1.16it/s]


Trial 5, Episode 470, Average Reward: 976.25


Trial 5:  48%|████▊     | 481/1000 [06:58<07:26,  1.16it/s]


Trial 5, Episode 480, Average Reward: 997.27


Trial 5:  49%|████▉     | 491/1000 [07:06<07:25,  1.14it/s]


Trial 5, Episode 490, Average Reward: 1013.80


Trial 5:  50%|█████     | 501/1000 [07:15<07:13,  1.15it/s]


Trial 5, Episode 500, Average Reward: 1009.39


Trial 5:  51%|█████     | 511/1000 [07:24<07:05,  1.15it/s]


Trial 5, Episode 510, Average Reward: 1059.22


Trial 5:  52%|█████▏    | 521/1000 [07:33<07:00,  1.14it/s]


Trial 5, Episode 520, Average Reward: 1090.07


Trial 5:  53%|█████▎    | 531/1000 [07:41<06:47,  1.15it/s]


Trial 5, Episode 530, Average Reward: 1101.15


Trial 5:  54%|█████▍    | 541/1000 [07:50<06:36,  1.16it/s]


Trial 5, Episode 540, Average Reward: 1103.95


Trial 5:  55%|█████▌    | 551/1000 [07:59<06:28,  1.16it/s]


Trial 5, Episode 550, Average Reward: 1095.98


Trial 5:  56%|█████▌    | 561/1000 [08:07<06:19,  1.16it/s]


Trial 5, Episode 560, Average Reward: 1022.89


Trial 5:  57%|█████▋    | 571/1000 [08:16<06:17,  1.14it/s]


Trial 5, Episode 570, Average Reward: 1044.40


Trial 5:  58%|█████▊    | 581/1000 [08:25<05:58,  1.17it/s]


Trial 5, Episode 580, Average Reward: 1075.44


Trial 5:  59%|█████▉    | 591/1000 [08:33<05:52,  1.16it/s]


Trial 5, Episode 590, Average Reward: 1124.51


Trial 5:  60%|██████    | 601/1000 [08:42<05:39,  1.18it/s]


Trial 5, Episode 600, Average Reward: 1333.42


Trial 5:  61%|██████    | 611/1000 [08:51<05:38,  1.15it/s]


Trial 5, Episode 610, Average Reward: 1332.79


Trial 5:  62%|██████▏   | 621/1000 [08:59<05:29,  1.15it/s]


Trial 5, Episode 620, Average Reward: 1333.17


Trial 5:  63%|██████▎   | 631/1000 [09:08<05:19,  1.15it/s]


Trial 5, Episode 630, Average Reward: 1376.08


Trial 5:  64%|██████▍   | 641/1000 [09:17<05:27,  1.10it/s]


Trial 5, Episode 640, Average Reward: 1449.41


Trial 5:  65%|██████▌   | 651/1000 [09:25<04:59,  1.16it/s]


Trial 5, Episode 650, Average Reward: 1499.12


Trial 5:  66%|██████▌   | 661/1000 [09:34<04:54,  1.15it/s]


Trial 5, Episode 660, Average Reward: 1610.97


Trial 5:  67%|██████▋   | 671/1000 [09:43<04:43,  1.16it/s]


Trial 5, Episode 670, Average Reward: 1652.34


Trial 5:  68%|██████▊   | 681/1000 [09:51<04:35,  1.16it/s]


Trial 5, Episode 680, Average Reward: 1754.15


Trial 5:  69%|██████▉   | 691/1000 [10:00<04:25,  1.17it/s]


Trial 5, Episode 690, Average Reward: 1786.40


Trial 5:  70%|███████   | 701/1000 [10:09<04:17,  1.16it/s]


Trial 5, Episode 700, Average Reward: 1786.96


Trial 5:  71%|███████   | 711/1000 [10:17<04:12,  1.15it/s]


Trial 5, Episode 710, Average Reward: 1818.07


Trial 5:  72%|███████▏  | 721/1000 [10:26<04:01,  1.15it/s]


Trial 5, Episode 720, Average Reward: 1858.06


Trial 5:  73%|███████▎  | 731/1000 [10:35<03:50,  1.17it/s]


Trial 5, Episode 730, Average Reward: 1889.50


Trial 5:  74%|███████▍  | 741/1000 [10:43<03:45,  1.15it/s]


Trial 5, Episode 740, Average Reward: 1905.41


Trial 5:  75%|███████▌  | 751/1000 [10:52<03:37,  1.15it/s]


Trial 5, Episode 750, Average Reward: 1965.65


Trial 5:  76%|███████▌  | 761/1000 [11:01<03:23,  1.17it/s]


Trial 5, Episode 760, Average Reward: 1992.02


Trial 5:  77%|███████▋  | 771/1000 [11:09<03:15,  1.17it/s]


Trial 5, Episode 770, Average Reward: 2025.40


Trial 5:  78%|███████▊  | 781/1000 [11:18<03:15,  1.12it/s]


Trial 5, Episode 780, Average Reward: 1988.12


Trial 5:  79%|███████▉  | 791/1000 [11:27<02:59,  1.16it/s]


Trial 5, Episode 790, Average Reward: 2059.64


Trial 5:  80%|████████  | 801/1000 [11:35<02:54,  1.14it/s]


Trial 5, Episode 800, Average Reward: 2021.81


Trial 5:  81%|████████  | 811/1000 [11:44<02:43,  1.16it/s]


Trial 5, Episode 810, Average Reward: 1971.30


Trial 5:  82%|████████▏ | 821/1000 [11:53<02:35,  1.15it/s]


Trial 5, Episode 820, Average Reward: 1909.57


Trial 5:  83%|████████▎ | 831/1000 [12:02<02:26,  1.16it/s]


Trial 5, Episode 830, Average Reward: 1931.70


Trial 5:  84%|████████▍ | 841/1000 [12:10<02:16,  1.17it/s]


Trial 5, Episode 840, Average Reward: 1838.16


Trial 5:  85%|████████▌ | 851/1000 [12:19<02:09,  1.15it/s]


Trial 5, Episode 850, Average Reward: 1783.77


Trial 5:  86%|████████▌ | 861/1000 [12:27<02:00,  1.15it/s]


Trial 5, Episode 860, Average Reward: 1760.45


Trial 5:  87%|████████▋ | 871/1000 [12:36<01:51,  1.15it/s]


Trial 5, Episode 870, Average Reward: 1734.15


Trial 5:  88%|████████▊ | 881/1000 [12:45<01:42,  1.16it/s]


Trial 5, Episode 880, Average Reward: 1674.36


Trial 5:  89%|████████▉ | 891/1000 [12:53<01:35,  1.14it/s]


Trial 5, Episode 890, Average Reward: 1602.43


Trial 5:  90%|█████████ | 901/1000 [13:02<01:25,  1.15it/s]


Trial 5, Episode 900, Average Reward: 1569.41


Trial 5:  91%|█████████ | 911/1000 [13:11<01:16,  1.16it/s]


Trial 5, Episode 910, Average Reward: 1762.32


Trial 5:  92%|█████████▏| 921/1000 [13:19<01:08,  1.16it/s]


Trial 5, Episode 920, Average Reward: 1842.77


Trial 5:  93%|█████████▎| 931/1000 [13:30<01:18,  1.13s/it]


Trial 5, Episode 930, Average Reward: 1808.57


Trial 5:  94%|█████████▍| 941/1000 [13:40<00:58,  1.01it/s]


Trial 5, Episode 940, Average Reward: 1936.12


Trial 5:  95%|█████████▌| 951/1000 [13:50<00:52,  1.07s/it]


Trial 5, Episode 950, Average Reward: 1928.10


Trial 5:  96%|█████████▌| 961/1000 [14:00<00:37,  1.03it/s]


Trial 5, Episode 960, Average Reward: 1939.41


Trial 5:  97%|█████████▋| 971/1000 [14:09<00:24,  1.16it/s]


Trial 5, Episode 970, Average Reward: 2045.50


Trial 5:  98%|█████████▊| 981/1000 [14:21<00:21,  1.11s/it]


Trial 5, Episode 980, Average Reward: 2075.00


Trial 5:  99%|█████████▉| 991/1000 [14:32<00:09,  1.00s/it]


Trial 5, Episode 990, Average Reward: 2211.04


Trial 5: 100%|██████████| 1000/1000 [14:42<00:00,  1.13it/s]


Video saved as 'ppo_halfcheetah_trial_5.mp4'

Training progress plot saved as 'ppo_training_progress.png'
