In [None]:
# Importing necessary modules
# NOTE: This code was built on top of the code from cheetah_PPO.py
 
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import gymnasium as gym
import imageio
import matplotlib.pyplot as plt
from collections import deque
from torch.distributions import MultivariateNormal
from tqdm import tqdm

class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, action_std):
        """
        Initializes the Actor network with fixed action standard deviation.
        
        Args:
        - state_dim  : Dimension of the input state
        - action_dim : Dimension of the output action
        - action_std : Fixed standard deviation for the action distribution
        """
        super(Actor, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim),
        )
        # Fixed action std for exploration (could be made learnable if desired)
        self.action_std = action_std
    
    def forward(self, state):
        """
        Forward pass to compute action mean and covariance matrix.
        
        Args:
        - state: Input state tensor
        
        Returns:
        - action_mean : Mean of the action distribution
        - cov_matrix : Covariance matrix for exploration
        """
        action_mean = self.fc(state)
        cov_matrix = torch.diag(torch.full((action_mean.size(-1),), self.action_std**2)).unsqueeze(0)
        return action_mean, cov_matrix

class Critic(nn.Module):
    def __init__(self, state_dim):
        """
        Initializes the Critic network for value function approximation.
        
        Args:
        - state_dim: Dimension of the input state
        """
        super(Critic, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
        )

    def forward(self, state):
        """Returns the estimated value of the input state."""
        return self.fc(state)

class PPO:
    def __init__(self, state_dim, action_dim, action_std, lr, gamma, eps_clip, lam, epochs, num_mini_batches):
        """
        Enhanced PPO agent with GAE and minibatch updates.
        
        Args:
        - state_dim        : Dimension of the state space
        - action_dim       : Dimension of the action space
        - action_std       : Standard deviation for action exploration
        - lr               : Learning rate for both networks
        - gamma            : Discount factor for future rewards
        - eps_clip         : PPO clipping parameter
        - lam              : Lambda parameter for GAE
        - epochs           : Number of epochs for each PPO update
        - num_mini_batches : Number of minibatches for each update
        """
        self.actor  = Actor(state_dim, action_dim, action_std)
        self.critic = Critic(state_dim)
        
        self.actor_optimizer  = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr)
        
        self.gamma            = gamma
        self.eps_clip         = eps_clip
        self.action_std       = action_std
        self.lam              = lam  # Lambda parameter for GAE calculation
        self.epochs           = epochs
        self.num_mini_batches = num_mini_batches
    
    def select_action(self, state):
        """
        Selects an action using the current policy.
        
        Returns:
        - action         : Selected action
        - action_logprob : Log probability of the action
        - value          : Value estimate of the current state
        """
        with torch.no_grad():
            state_t = torch.FloatTensor(state).unsqueeze(0)
            action_mean, cov_matrix = self.actor(state_t)
            dist           = MultivariateNormal(action_mean, cov_matrix)
            action         = dist.sample()
            action_logprob = dist.log_prob(action)
            value          = self.critic(state_t).item()
            return action.squeeze(0).numpy(), action_logprob.item(), value
    
    def evaluate(self, states, actions):
        """
        Evaluates actions given states to compute probabilities and values.
        
        Returns:
        - action_logprobs: Log probabilities of the actions
        - state_values  : Value estimates of the states
        - dist_entropy  : Entropy of the action distribution
        """
        action_mean, cov_matrix = self.actor(states)
        dist            = MultivariateNormal(action_mean, cov_matrix)
        action_logprobs = dist.log_prob(actions)
        dist_entropy    = dist.entropy()
        state_values    = self.critic(states)
        return action_logprobs, state_values, dist_entropy

def compute_gae(rewards, values, dones, gamma, lam):
    """
    Computes Generalized Advantage Estimation (GAE).
    
    Args:
    - rewards : List of rewards for each timestep
    - values  : List of value estimates (length T+1, includes next state)
    - dones   : List of done flags
    - gamma   : Discount factor
    - lam     : GAE lambda parameter
    
    Returns:
    - advantages : GAE advantages
    - returns    : Computed returns (advantages + values)
    """
    advantages = np.zeros(len(rewards), dtype=np.float32)
    gae = 0
    for i in reversed(range(len(rewards))):
        # Delta = rt + (gamma)V(t+1) - V(t)
        delta = rewards[i] + gamma * values[i+1] * (1 - dones[i]) - values[i]
        # GAE(t) = (delta)t + ((gamma.(lambda))(delta)t+1) + ((gamma.λ)^2(delta)t+2 + ...
        gae = delta + gamma * lam * (1 - dones[i]) * gae
        advantages[i] = gae
    returns = advantages + values[:-1]
    return advantages, returns

def collect_rollout(env, agent, timesteps):
    """
    Collects experience from the environment.
    Enhanced version that tracks values for GAE computation.
    
    Returns:
    - states, actions, rewards, logprobs, dones : Experience data
    - values : Value estimates including the final state
    """
    states   = []
    actions  = []
    logprobs = []
    rewards  = []
    dones    = []
    values   = []

    state, _ = env.reset(seed=None)
    done = False

    # Get initial state value for GAE
    state_t = torch.FloatTensor(state).unsqueeze(0)
    value = agent.critic(state_t).item()

    for _ in range(timesteps):
        action, action_logprob, val = agent.select_action(state)
        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

        # Store transition data
        states.append(state)
        actions.append(action)
        logprobs.append(action_logprob)
        rewards.append(reward)
        dones.append(float(done))
        values.append(val)

        state = next_state
        if done:
            state, _ = env.reset(seed=None)

    # Get value of final state for GAE calculation
    state_t    = torch.FloatTensor(state).unsqueeze(0)
    next_value = agent.critic(state_t).item()
    values.append(next_value)

    return states, actions, rewards, logprobs, dones, values

def update(agent, states, actions, rewards, logprobs, dones, values, gamma, eps_clip):
    """
    Updates the policy and value networks using PPO with GAE and minibatch updates.
    
    Key differences from basic PPO:
    1. Uses GAE for advantage estimation
    2. Implements minibatch updates
    3. Multiple epochs per update
    """
    # Convert collected data to tensors
    states       = torch.FloatTensor(states)
    actions      = torch.FloatTensor(actions)
    old_logprobs = torch.FloatTensor(logprobs)

    # Compute advantages using GAE
    advantages, returns = compute_gae(rewards, values, dones, gamma, agent.lam)
    advantages          = torch.FloatTensor(advantages)
    returns             = torch.FloatTensor(returns)

    # Normalize advantages for training stability
    advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

    # Minibatch training
    batch_size      = len(states)
    mini_batch_size = batch_size // agent.num_mini_batches

    for _ in range(agent.epochs):
        # Shuffle data for each epoch
        indices = np.arange(batch_size)
        np.random.shuffle(indices)
        
        # Update in minibatches
        for start in range(0, batch_size, mini_batch_size):
            end    = start + mini_batch_size
            mb_idx = indices[start:end]

            # Get minibatch data
            mb_states       = states[mb_idx]
            mb_actions      = actions[mb_idx]
            mb_returns      = returns[mb_idx]
            mb_advantages   = advantages[mb_idx]
            mb_old_logprobs = old_logprobs[mb_idx]

            # PPO update steps
            logprobs, state_values, dist_entropy = agent.evaluate(mb_states, mb_actions)
            ratios = torch.exp(logprobs - mb_old_logprobs)
            
            # PPO policy loss with clipping
            surr1       = ratios * mb_advantages
            surr2       = torch.clamp(ratios, 1 - eps_clip, 1 + eps_clip) * mb_advantages
            policy_loss = -torch.min(surr1, surr2).mean()

            # Value loss and entropy bonus
            value_loss = nn.MSELoss()(state_values.squeeze(), mb_returns)
            loss = policy_loss + 0.5 * value_loss - 0.01 * dist_entropy.mean()

            # Gradient update
            agent.actor_optimizer.zero_grad()
            agent.critic_optimizer.zero_grad()
            loss.backward()
            agent.actor_optimizer.step()
            agent.critic_optimizer.step()

def run_single_trial(trial_num, env_name):
    """
    Runs a single training trial.
    
    Args:
    - trial_num: Current trial number for tracking
    - env_name : Name of the gymnasium environment
    """
    print(f"\nStarting Trial {trial_num + 1}/5 with environment {env_name}")
    
    # Initialize environment and get dimensions
    env        = gym.make(env_name, render_mode="rgb_array")
    state_dim  = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    action_std = 0.5

    # Set hyperparameters
    lr       = 3e-4
    gamma    = 0.99
    eps_clip = 0.2
    lam      = 0.95          # Lambda for GAE
    epochs   = 10         # PPO update epochs
    num_mini_batches      = 32
    episodes              = 1000
    timesteps_per_rollout = 2048

    # Set seeds for reproducibility
    torch.manual_seed(trial_num)
    np.random.seed(trial_num)

    # Initialize PPO agent
    ppo = PPO(state_dim, action_dim, action_std, lr, gamma, eps_clip, lam, epochs, num_mini_batches)

    # Training tracking variables
    trial_rewards = []
    avg_rewards = deque(maxlen=100)

    # Main training loop
    for episode in tqdm(range(episodes), desc=f"Trial {trial_num + 1}"):
        # Collect experience and update policy
        states, actions, rewards, logprobs, dones, values = collect_rollout(env, ppo, timesteps_per_rollout)
        update(ppo, states, actions, rewards, logprobs, dones, values, gamma, eps_clip)

        # Track and log progress
        episode_reward = sum(rewards)
        trial_rewards.append(episode_reward)
        avg_rewards.append(episode_reward)

        if episode % 10 == 0:
            avg_reward = np.mean(list(avg_rewards))
            print(f"\nTrial {trial_num + 1}, Episode {episode}, Average Reward: {avg_reward:.2f}")

    # Record demonstration video
    try:
        frames    = []
        state, _  = env.reset()
        done      = False
        steps     = 0
        max_steps = 1000

        while not done and steps < max_steps:
            action, _, _ = ppo.select_action(state)
            state, _, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            if steps % 2 == 0:  # Save every other frame
                frames.append(env.render())
            steps += 1

        video_filename = f"ppo_hopper_trial_{trial_num + 1}.mp4"
        imageio.mimsave(video_filename, frames, fps=30)
        print(f"Video saved as '{video_filename}'")
    except Exception as e:
        print(f"Warning: Could not save video for trial {trial_num + 1}: {e}")

    env.close()
    return trial_rewards, ppo

def plot_rewards_with_trials(all_trial_rewards, window=100):
    """
    Plots training progress across all trials with confidence intervals.
    
    Args:
    - all_trial_rewards : List of rewards from each trial
    - window           : Window size for reward smoothing
    """
    plt.figure(figsize=(10, 6))
    rewards_array = np.array(all_trial_rewards)
    mean_rewards  = np.mean(rewards_array, axis=0)
    std_rewards   = np.std(rewards_array, axis=0)

    # Compute smoothed means using moving average
    smoothed_means = []
    for i in range(len(mean_rewards)):
        if i < window:
            smoothed_means.append(np.mean(mean_rewards[:i+1]))
        else:
            smoothed_means.append(np.mean(mean_rewards[i-window+1:i+1]))
    smoothed_means = np.array(smoothed_means)

    # Plot individual trials
    for trial_rewards in all_trial_rewards:
        plt.plot(trial_rewards, alpha=0.1, color='gray')

    # Plot mean with confidence interval
    episodes = range(len(mean_rewards))
    plt.plot(episodes, smoothed_means, linewidth=2, color='blue', label='Mean Reward')
    plt.fill_between(episodes, 
                     smoothed_means - std_rewards, 
                     smoothed_means + std_rewards, 
                     color='blue', alpha=0.2, label='Standard Deviation')

    plt.xlabel('Episode')
    plt.ylabel('Average Return')
    plt.title('PPO Training Progress - Hopper-v4')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.savefig('ppo_training_progress.png', dpi=300, bbox_inches='tight')
    plt.close()

def main():
    num_trials        = 5
    all_trial_rewards = []
    env_name          = "Hopper-v4"
    for trial in range(num_trials):
        trial_rewards, _ = run_single_trial(trial, env_name)
        all_trial_rewards.append(trial_rewards)
        np.save(f'trial_rewards_{trial}.npy', trial_rewards)

    plot_rewards_with_trials(all_trial_rewards)
    print("\nTraining progress plot saved as 'ppo_training_progress.png'")

if __name__ == "__main__":
    main()



Starting Trial 1/5 with environment Hopper-v4


Trial 1:   0%|          | 1/1000 [00:02<41:46,  2.51s/it]


Trial 1, Episode 0, Average Reward: 1368.45


Trial 1:   1%|          | 11/1000 [00:23<35:14,  2.14s/it]


Trial 1, Episode 10, Average Reward: 2785.30


Trial 1:   2%|▏         | 21/1000 [00:44<37:23,  2.29s/it]


Trial 1, Episode 20, Average Reward: 3633.27


Trial 1:   3%|▎         | 31/1000 [01:08<38:02,  2.36s/it]


Trial 1, Episode 30, Average Reward: 4006.33


Trial 1:   4%|▍         | 41/1000 [01:30<34:29,  2.16s/it]


Trial 1, Episode 40, Average Reward: 4273.70


Trial 1:   5%|▌         | 51/1000 [01:53<37:14,  2.35s/it]


Trial 1, Episode 50, Average Reward: 4508.51


Trial 1:   6%|▌         | 61/1000 [02:18<39:45,  2.54s/it]


Trial 1, Episode 60, Average Reward: 4697.08


Trial 1:   7%|▋         | 71/1000 [02:40<30:24,  1.96s/it]


Trial 1, Episode 70, Average Reward: 4839.48


Trial 1:   8%|▊         | 81/1000 [02:58<28:30,  1.86s/it]


Trial 1, Episode 80, Average Reward: 4950.29


Trial 1:   9%|▉         | 91/1000 [03:21<36:02,  2.38s/it]


Trial 1, Episode 90, Average Reward: 5032.53


Trial 1:  10%|█         | 101/1000 [03:45<36:10,  2.41s/it]


Trial 1, Episode 100, Average Reward: 5154.24


Trial 1:  11%|█         | 111/1000 [04:09<35:22,  2.39s/it]


Trial 1, Episode 110, Average Reward: 5455.11


Trial 1:  12%|█▏        | 121/1000 [04:34<36:38,  2.50s/it]


Trial 1, Episode 120, Average Reward: 5601.12


Trial 1:  13%|█▎        | 131/1000 [04:57<33:53,  2.34s/it]


Trial 1, Episode 130, Average Reward: 5732.49


Trial 1:  14%|█▍        | 141/1000 [05:21<34:10,  2.39s/it]


Trial 1, Episode 140, Average Reward: 5826.49


Trial 1:  15%|█▌        | 151/1000 [05:46<33:45,  2.39s/it]


Trial 1, Episode 150, Average Reward: 5891.47


Trial 1:  16%|█▌        | 161/1000 [06:09<32:37,  2.33s/it]


Trial 1, Episode 160, Average Reward: 5933.65


Trial 1:  17%|█▋        | 171/1000 [06:34<34:05,  2.47s/it]


Trial 1, Episode 170, Average Reward: 5980.62


Trial 1:  18%|█▊        | 181/1000 [06:57<31:47,  2.33s/it]


Trial 1, Episode 180, Average Reward: 6036.86


Trial 1:  19%|█▉        | 191/1000 [07:21<31:31,  2.34s/it]


Trial 1, Episode 190, Average Reward: 6102.81


Trial 1:  20%|██        | 201/1000 [07:45<31:13,  2.34s/it]


Trial 1, Episode 200, Average Reward: 6135.59


Trial 1:  21%|██        | 211/1000 [08:08<29:53,  2.27s/it]


Trial 1, Episode 210, Average Reward: 6170.30


Trial 1:  22%|██▏       | 221/1000 [08:33<35:19,  2.72s/it]


Trial 1, Episode 220, Average Reward: 6195.93


Trial 1:  23%|██▎       | 231/1000 [08:57<30:36,  2.39s/it]


Trial 1, Episode 230, Average Reward: 6225.90


Trial 1:  24%|██▍       | 241/1000 [09:20<29:48,  2.36s/it]


Trial 1, Episode 240, Average Reward: 6260.54


Trial 1:  25%|██▌       | 251/1000 [09:45<29:27,  2.36s/it]


Trial 1, Episode 250, Average Reward: 6295.12


Trial 1:  26%|██▌       | 261/1000 [10:08<29:01,  2.36s/it]


Trial 1, Episode 260, Average Reward: 6341.61


Trial 1:  27%|██▋       | 271/1000 [10:32<29:27,  2.43s/it]


Trial 1, Episode 270, Average Reward: 6375.09


Trial 1:  28%|██▊       | 281/1000 [10:56<28:11,  2.35s/it]


Trial 1, Episode 280, Average Reward: 6389.55


Trial 1:  29%|██▉       | 291/1000 [11:19<28:11,  2.39s/it]


Trial 1, Episode 290, Average Reward: 6409.46


Trial 1:  30%|███       | 301/1000 [11:43<27:31,  2.36s/it]


Trial 1, Episode 300, Average Reward: 6448.53


Trial 1:  31%|███       | 311/1000 [12:07<26:59,  2.35s/it]


Trial 1, Episode 310, Average Reward: 6491.77


Trial 1:  32%|███▏      | 321/1000 [12:31<27:40,  2.45s/it]


Trial 1, Episode 320, Average Reward: 6538.13


Trial 1:  33%|███▎      | 331/1000 [12:53<23:02,  2.07s/it]


Trial 1, Episode 330, Average Reward: 6569.98


Trial 1:  34%|███▍      | 341/1000 [13:12<20:21,  1.85s/it]


Trial 1, Episode 340, Average Reward: 6583.20


Trial 1:  35%|███▌      | 351/1000 [13:30<19:51,  1.84s/it]


Trial 1, Episode 350, Average Reward: 6593.58


Trial 1:  36%|███▌      | 361/1000 [13:49<19:38,  1.84s/it]


Trial 1, Episode 360, Average Reward: 6604.19


Trial 1:  37%|███▋      | 371/1000 [14:07<19:10,  1.83s/it]


Trial 1, Episode 370, Average Reward: 6614.68


Trial 1:  38%|███▊      | 381/1000 [14:26<19:59,  1.94s/it]


Trial 1, Episode 380, Average Reward: 6622.91


Trial 1:  39%|███▉      | 391/1000 [14:44<18:33,  1.83s/it]


Trial 1, Episode 390, Average Reward: 6631.62


Trial 1:  40%|████      | 401/1000 [15:03<18:28,  1.85s/it]


Trial 1, Episode 400, Average Reward: 6627.31


Trial 1:  41%|████      | 411/1000 [15:21<18:08,  1.85s/it]


Trial 1, Episode 410, Average Reward: 6618.37


Trial 1:  42%|████▏     | 421/1000 [15:42<17:43,  1.84s/it]


Trial 1, Episode 420, Average Reward: 6604.87


Trial 1:  43%|████▎     | 431/1000 [15:59<16:39,  1.76s/it]


Trial 1, Episode 430, Average Reward: 6597.61


Trial 1:  44%|████▍     | 441/1000 [16:16<16:08,  1.73s/it]


Trial 1, Episode 440, Average Reward: 6612.53


Trial 1:  45%|████▌     | 451/1000 [16:34<16:05,  1.76s/it]


Trial 1, Episode 450, Average Reward: 6625.55


Trial 1:  46%|████▌     | 461/1000 [16:51<15:30,  1.73s/it]


Trial 1, Episode 460, Average Reward: 6631.96


Trial 1:  47%|████▋     | 471/1000 [17:09<15:15,  1.73s/it]


Trial 1, Episode 470, Average Reward: 6636.30


Trial 1:  48%|████▊     | 481/1000 [17:27<16:30,  1.91s/it]


Trial 1, Episode 480, Average Reward: 6663.32


Trial 1:  49%|████▉     | 491/1000 [17:44<14:44,  1.74s/it]


Trial 1, Episode 490, Average Reward: 6666.83


Trial 1:  50%|█████     | 501/1000 [18:03<15:41,  1.89s/it]


Trial 1, Episode 500, Average Reward: 6690.18


Trial 1:  51%|█████     | 511/1000 [18:21<14:00,  1.72s/it]


Trial 1, Episode 510, Average Reward: 6712.33


Trial 1:  52%|█████▏    | 521/1000 [18:39<14:10,  1.78s/it]


Trial 1, Episode 520, Average Reward: 6740.18


Trial 1:  53%|█████▎    | 531/1000 [18:56<13:29,  1.73s/it]


Trial 1, Episode 530, Average Reward: 6770.70


Trial 1:  54%|█████▍    | 541/1000 [19:14<13:26,  1.76s/it]


Trial 1, Episode 540, Average Reward: 6801.11


Trial 1:  55%|█████▌    | 551/1000 [19:32<13:48,  1.84s/it]


Trial 1, Episode 550, Average Reward: 6825.89


Trial 1:  56%|█████▌    | 561/1000 [19:50<12:39,  1.73s/it]


Trial 1, Episode 560, Average Reward: 6840.82


Trial 1:  57%|█████▋    | 571/1000 [20:07<12:15,  1.72s/it]


Trial 1, Episode 570, Average Reward: 6866.77


Trial 1:  58%|█████▊    | 581/1000 [20:24<12:17,  1.76s/it]


Trial 1, Episode 580, Average Reward: 6872.20


Trial 1:  59%|█████▉    | 591/1000 [20:42<11:47,  1.73s/it]


Trial 1, Episode 590, Average Reward: 6897.22


Trial 1:  60%|██████    | 601/1000 [21:00<11:30,  1.73s/it]


Trial 1, Episode 600, Average Reward: 6903.77


Trial 1:  61%|██████    | 611/1000 [21:17<11:39,  1.80s/it]


Trial 1, Episode 610, Average Reward: 6911.73


Trial 1:  62%|██████▏   | 621/1000 [21:36<11:27,  1.81s/it]


Trial 1, Episode 620, Average Reward: 6920.81


Trial 1:  63%|██████▎   | 631/1000 [21:55<11:21,  1.85s/it]


Trial 1, Episode 630, Average Reward: 6918.53


Trial 1:  64%|██████▍   | 641/1000 [22:13<10:50,  1.81s/it]


Trial 1, Episode 640, Average Reward: 6912.33


Trial 1:  65%|██████▌   | 651/1000 [22:32<11:11,  1.92s/it]


Trial 1, Episode 650, Average Reward: 6909.27


Trial 1:  66%|██████▌   | 661/1000 [22:52<11:19,  2.00s/it]


Trial 1, Episode 660, Average Reward: 6919.51


Trial 1:  67%|██████▋   | 671/1000 [23:11<10:04,  1.84s/it]


Trial 1, Episode 670, Average Reward: 6923.11


Trial 1:  68%|██████▊   | 681/1000 [23:31<10:02,  1.89s/it]


Trial 1, Episode 680, Average Reward: 6938.80


Trial 1:  69%|██████▉   | 691/1000 [23:49<09:19,  1.81s/it]


Trial 1, Episode 690, Average Reward: 6938.29


Trial 1:  70%|███████   | 701/1000 [24:09<09:43,  1.95s/it]


Trial 1, Episode 700, Average Reward: 6946.07


Trial 1:  71%|███████   | 711/1000 [24:27<08:43,  1.81s/it]


Trial 1, Episode 710, Average Reward: 6959.71


Trial 1:  72%|███████▏  | 721/1000 [24:46<08:45,  1.88s/it]


Trial 1, Episode 720, Average Reward: 6962.22


Trial 1:  73%|███████▎  | 731/1000 [25:06<08:27,  1.89s/it]


Trial 1, Episode 730, Average Reward: 6977.29


Trial 1:  74%|███████▍  | 741/1000 [25:24<07:51,  1.82s/it]


Trial 1, Episode 740, Average Reward: 6990.55


Trial 1:  75%|███████▌  | 751/1000 [25:42<07:35,  1.83s/it]


Trial 1, Episode 750, Average Reward: 7003.71


Trial 1:  76%|███████▌  | 761/1000 [26:00<07:13,  1.81s/it]


Trial 1, Episode 760, Average Reward: 7016.89


Trial 1:  77%|███████▋  | 771/1000 [26:18<06:50,  1.79s/it]


Trial 1, Episode 770, Average Reward: 7030.24


Trial 1:  78%|███████▊  | 781/1000 [26:41<08:08,  2.23s/it]


Trial 1, Episode 780, Average Reward: 7035.86


Trial 1:  79%|███████▉  | 791/1000 [27:02<07:56,  2.28s/it]


Trial 1, Episode 790, Average Reward: 7038.27


Trial 1:  80%|████████  | 801/1000 [27:24<06:57,  2.10s/it]


Trial 1, Episode 800, Average Reward: 7050.27


Trial 1:  81%|████████  | 811/1000 [27:46<06:34,  2.09s/it]


Trial 1, Episode 810, Average Reward: 7042.53


Trial 1:  82%|████████▏ | 821/1000 [28:09<06:19,  2.12s/it]


Trial 1, Episode 820, Average Reward: 7049.54


Trial 1:  83%|████████▎ | 831/1000 [28:33<06:15,  2.22s/it]


Trial 1, Episode 830, Average Reward: 7054.55


Trial 1:  84%|████████▍ | 841/1000 [28:54<05:46,  2.18s/it]


Trial 1, Episode 840, Average Reward: 7040.07


Trial 1:  85%|████████▌ | 851/1000 [29:15<04:57,  2.00s/it]


Trial 1, Episode 850, Average Reward: 7036.91


Trial 1:  86%|████████▌ | 861/1000 [29:34<04:24,  1.90s/it]


Trial 1, Episode 860, Average Reward: 7031.91


Trial 1:  87%|████████▋ | 871/1000 [29:53<03:56,  1.84s/it]


Trial 1, Episode 870, Average Reward: 7030.60


Trial 1:  88%|████████▊ | 881/1000 [30:12<04:07,  2.08s/it]


Trial 1, Episode 880, Average Reward: 7026.32


Trial 1:  89%|████████▉ | 891/1000 [30:34<03:40,  2.02s/it]


Trial 1, Episode 890, Average Reward: 7047.04


Trial 1:  90%|█████████ | 901/1000 [30:53<03:16,  1.98s/it]


Trial 1, Episode 900, Average Reward: 7048.47


Trial 1:  91%|█████████ | 911/1000 [31:13<03:03,  2.06s/it]


Trial 1, Episode 910, Average Reward: 7060.53


Trial 1:  92%|█████████▏| 921/1000 [31:33<02:38,  2.00s/it]


Trial 1, Episode 920, Average Reward: 7055.21


Trial 1:  93%|█████████▎| 931/1000 [31:55<02:27,  2.14s/it]


Trial 1, Episode 930, Average Reward: 7042.84


Trial 1:  94%|█████████▍| 941/1000 [32:25<02:58,  3.02s/it]


Trial 1, Episode 940, Average Reward: 7061.24


Trial 1:  95%|█████████▌| 951/1000 [32:56<02:31,  3.10s/it]


Trial 1, Episode 950, Average Reward: 7069.25


Trial 1:  96%|█████████▌| 961/1000 [33:16<01:16,  1.96s/it]


Trial 1, Episode 960, Average Reward: 7070.38


Trial 1:  97%|█████████▋| 971/1000 [33:35<00:56,  1.95s/it]


Trial 1, Episode 970, Average Reward: 7065.32


Trial 1:  98%|█████████▊| 981/1000 [33:54<00:37,  1.95s/it]


Trial 1, Episode 980, Average Reward: 7075.20


Trial 1:  99%|█████████▉| 991/1000 [34:14<00:17,  1.90s/it]


Trial 1, Episode 990, Average Reward: 7028.30


Trial 1: 100%|██████████| 1000/1000 [34:31<00:00,  2.07s/it]


Video saved as 'ppo_hopper_trial_1.mp4'

Starting Trial 2/5 with environment Hopper-v4


Trial 2:   0%|          | 1/1000 [00:01<32:54,  1.98s/it]


Trial 2, Episode 0, Average Reward: 1693.08


Trial 2:   1%|          | 11/1000 [00:21<31:35,  1.92s/it]


Trial 2, Episode 10, Average Reward: 3578.67


Trial 2:   2%|▏         | 21/1000 [00:40<31:04,  1.90s/it]


Trial 2, Episode 20, Average Reward: 4068.49


Trial 2:   3%|▎         | 31/1000 [01:00<32:06,  1.99s/it]


Trial 2, Episode 30, Average Reward: 4334.00


Trial 2:   4%|▍         | 41/1000 [01:19<30:37,  1.92s/it]


Trial 2, Episode 40, Average Reward: 4533.82


Trial 2:   5%|▌         | 51/1000 [01:38<30:31,  1.93s/it]


Trial 2, Episode 50, Average Reward: 4736.63


Trial 2:   6%|▌         | 61/1000 [01:58<30:57,  1.98s/it]


Trial 2, Episode 60, Average Reward: 4929.64


Trial 2:   7%|▋         | 71/1000 [02:17<29:36,  1.91s/it]


Trial 2, Episode 70, Average Reward: 5109.08


Trial 2:   8%|▊         | 81/1000 [02:37<29:14,  1.91s/it]


Trial 2, Episode 80, Average Reward: 5241.57


Trial 2:   9%|▉         | 91/1000 [02:56<28:50,  1.90s/it]


Trial 2, Episode 90, Average Reward: 5317.06


Trial 2:  10%|█         | 101/1000 [03:15<28:45,  1.92s/it]


Trial 2, Episode 100, Average Reward: 5421.73


Trial 2:  11%|█         | 111/1000 [03:35<28:13,  1.90s/it]


Trial 2, Episode 110, Average Reward: 5687.30


Trial 2:  12%|█▏        | 121/1000 [03:54<29:55,  2.04s/it]


Trial 2, Episode 120, Average Reward: 5840.49


Trial 2:  13%|█▎        | 131/1000 [04:13<27:30,  1.90s/it]


Trial 2, Episode 130, Average Reward: 5966.55


Trial 2:  14%|█▍        | 141/1000 [04:32<27:23,  1.91s/it]


Trial 2, Episode 140, Average Reward: 6049.74


Trial 2:  15%|█▌        | 151/1000 [04:51<26:51,  1.90s/it]


Trial 2, Episode 150, Average Reward: 6069.27


Trial 2:  16%|█▌        | 161/1000 [05:11<26:37,  1.90s/it]


Trial 2, Episode 160, Average Reward: 6093.43


Trial 2:  17%|█▋        | 171/1000 [05:30<26:09,  1.89s/it]


Trial 2, Episode 170, Average Reward: 6098.30


Trial 2:  18%|█▊        | 181/1000 [05:49<25:52,  1.90s/it]


Trial 2, Episode 180, Average Reward: 6114.70


Trial 2:  19%|█▉        | 191/1000 [06:09<25:49,  1.92s/it]


Trial 2, Episode 190, Average Reward: 6169.31


Trial 2:  20%|██        | 201/1000 [06:28<25:18,  1.90s/it]


Trial 2, Episode 200, Average Reward: 6214.52


Trial 2:  21%|██        | 211/1000 [06:48<25:41,  1.95s/it]


Trial 2, Episode 210, Average Reward: 6218.24


Trial 2:  22%|██▏       | 221/1000 [07:08<25:36,  1.97s/it]


Trial 2, Episode 220, Average Reward: 6225.37


Trial 2:  23%|██▎       | 231/1000 [07:27<24:27,  1.91s/it]


Trial 2, Episode 230, Average Reward: 6249.39


Trial 2:  24%|██▍       | 241/1000 [07:46<24:05,  1.90s/it]


Trial 2, Episode 240, Average Reward: 6299.84


Trial 2:  25%|██▌       | 251/1000 [08:06<24:09,  1.93s/it]


Trial 2, Episode 250, Average Reward: 6354.21


Trial 2:  26%|██▌       | 261/1000 [08:25<23:26,  1.90s/it]


Trial 2, Episode 260, Average Reward: 6388.44


Trial 2:  27%|██▋       | 271/1000 [08:44<23:00,  1.89s/it]


Trial 2, Episode 270, Average Reward: 6399.53


Trial 2:  28%|██▊       | 281/1000 [09:04<23:04,  1.93s/it]


Trial 2, Episode 280, Average Reward: 6405.16


Trial 2:  29%|██▉       | 291/1000 [09:23<22:49,  1.93s/it]


Trial 2, Episode 290, Average Reward: 6404.45


Trial 2:  30%|███       | 301/1000 [09:42<22:10,  1.90s/it]


Trial 2, Episode 300, Average Reward: 6397.06


Trial 2:  31%|███       | 311/1000 [10:02<22:19,  1.94s/it]


Trial 2, Episode 310, Average Reward: 6385.76


Trial 2:  32%|███▏      | 321/1000 [10:21<21:57,  1.94s/it]


Trial 2, Episode 320, Average Reward: 6409.74


Trial 2:  33%|███▎      | 331/1000 [10:40<21:15,  1.91s/it]


Trial 2, Episode 330, Average Reward: 6417.03


Trial 2:  34%|███▍      | 341/1000 [11:00<21:37,  1.97s/it]


Trial 2, Episode 340, Average Reward: 6409.89


Trial 2:  35%|███▌      | 351/1000 [11:19<20:32,  1.90s/it]


Trial 2, Episode 350, Average Reward: 6424.05


Trial 2:  36%|███▌      | 361/1000 [11:38<20:15,  1.90s/it]


Trial 2, Episode 360, Average Reward: 6422.68


Trial 2:  37%|███▋      | 371/1000 [11:58<21:05,  2.01s/it]


Trial 2, Episode 370, Average Reward: 6437.31


Trial 2:  38%|███▊      | 381/1000 [12:18<19:48,  1.92s/it]


Trial 2, Episode 380, Average Reward: 6453.61


Trial 2:  39%|███▉      | 391/1000 [12:37<19:33,  1.93s/it]


Trial 2, Episode 390, Average Reward: 6447.30


Trial 2:  40%|████      | 401/1000 [12:56<20:04,  2.01s/it]


Trial 2, Episode 400, Average Reward: 6453.33


Trial 2:  41%|████      | 411/1000 [13:16<18:46,  1.91s/it]


Trial 2, Episode 410, Average Reward: 6481.14


Trial 2:  42%|████▏     | 421/1000 [13:35<18:31,  1.92s/it]


Trial 2, Episode 420, Average Reward: 6495.91


Trial 2:  43%|████▎     | 431/1000 [13:55<19:10,  2.02s/it]


Trial 2, Episode 430, Average Reward: 6509.70


Trial 2:  44%|████▍     | 441/1000 [14:14<17:50,  1.92s/it]


Trial 2, Episode 440, Average Reward: 6528.82


Trial 2:  45%|████▌     | 451/1000 [14:33<17:26,  1.91s/it]


Trial 2, Episode 450, Average Reward: 6546.11


Trial 2:  46%|████▌     | 461/1000 [14:52<17:05,  1.90s/it]


Trial 2, Episode 460, Average Reward: 6555.40


Trial 2:  47%|████▋     | 471/1000 [15:12<16:49,  1.91s/it]


Trial 2, Episode 470, Average Reward: 6571.35


Trial 2:  48%|████▊     | 481/1000 [15:31<16:35,  1.92s/it]


Trial 2, Episode 480, Average Reward: 6576.93


Trial 2:  49%|████▉     | 491/1000 [15:50<16:10,  1.91s/it]


Trial 2, Episode 490, Average Reward: 6596.72


Trial 2:  50%|█████     | 501/1000 [16:10<15:57,  1.92s/it]


Trial 2, Episode 500, Average Reward: 6617.36


Trial 2:  51%|█████     | 511/1000 [16:29<15:33,  1.91s/it]


Trial 2, Episode 510, Average Reward: 6621.18


Trial 2:  52%|█████▏    | 521/1000 [16:48<15:11,  1.90s/it]


Trial 2, Episode 520, Average Reward: 6627.72


Trial 2:  53%|█████▎    | 531/1000 [17:08<14:58,  1.92s/it]


Trial 2, Episode 530, Average Reward: 6633.25


Trial 2:  54%|█████▍    | 541/1000 [17:27<14:41,  1.92s/it]


Trial 2, Episode 540, Average Reward: 6621.77


Trial 2:  55%|█████▌    | 551/1000 [17:46<14:13,  1.90s/it]


Trial 2, Episode 550, Average Reward: 6611.76


Trial 2:  56%|█████▌    | 561/1000 [18:06<14:02,  1.92s/it]


Trial 2, Episode 560, Average Reward: 6605.83


Trial 2:  57%|█████▋    | 571/1000 [18:28<15:26,  2.16s/it]


Trial 2, Episode 570, Average Reward: 6600.38


Trial 2:  58%|█████▊    | 581/1000 [18:49<15:23,  2.20s/it]


Trial 2, Episode 580, Average Reward: 6585.48


Trial 2:  59%|█████▉    | 591/1000 [19:13<13:47,  2.02s/it]


Trial 2, Episode 590, Average Reward: 6590.54


Trial 2:  60%|██████    | 601/1000 [19:35<14:35,  2.19s/it]


Trial 2, Episode 600, Average Reward: 6590.30


Trial 2:  61%|██████    | 611/1000 [19:54<12:36,  1.95s/it]


Trial 2, Episode 610, Average Reward: 6583.45


Trial 2:  62%|██████▏   | 621/1000 [20:15<12:27,  1.97s/it]


Trial 2, Episode 620, Average Reward: 6582.72


Trial 2:  63%|██████▎   | 631/1000 [20:35<11:53,  1.93s/it]


Trial 2, Episode 630, Average Reward: 6589.23


Trial 2:  64%|██████▍   | 641/1000 [20:55<11:34,  1.93s/it]


Trial 2, Episode 640, Average Reward: 6612.47


Trial 2:  65%|██████▌   | 651/1000 [21:15<11:27,  1.97s/it]


Trial 2, Episode 650, Average Reward: 6638.01


Trial 2:  66%|██████▌   | 661/1000 [21:36<11:45,  2.08s/it]


Trial 2, Episode 660, Average Reward: 6658.11


Trial 2:  67%|██████▋   | 671/1000 [21:58<11:46,  2.15s/it]


Trial 2, Episode 670, Average Reward: 6664.36


Trial 2:  68%|██████▊   | 681/1000 [22:18<10:33,  1.98s/it]


Trial 2, Episode 680, Average Reward: 6685.88


Trial 2:  69%|██████▉   | 691/1000 [22:41<11:11,  2.17s/it]


Trial 2, Episode 690, Average Reward: 6704.22


Trial 2:  70%|███████   | 701/1000 [23:02<10:46,  2.16s/it]


Trial 2, Episode 700, Average Reward: 6712.13


Trial 2:  71%|███████   | 711/1000 [23:23<09:15,  1.92s/it]


Trial 2, Episode 710, Average Reward: 6739.31


Trial 2:  72%|███████▏  | 721/1000 [23:42<08:51,  1.90s/it]


Trial 2, Episode 720, Average Reward: 6754.52


Trial 2:  73%|███████▎  | 731/1000 [24:01<08:50,  1.97s/it]


Trial 2, Episode 730, Average Reward: 6753.51


Trial 2:  74%|███████▍  | 741/1000 [24:21<08:16,  1.92s/it]


Trial 2, Episode 740, Average Reward: 6754.65


Trial 2:  75%|███████▌  | 751/1000 [24:40<07:41,  1.85s/it]


Trial 2, Episode 750, Average Reward: 6756.07


Trial 2:  76%|███████▌  | 761/1000 [24:59<07:41,  1.93s/it]


Trial 2, Episode 760, Average Reward: 6762.58


Trial 2:  77%|███████▋  | 771/1000 [25:23<08:51,  2.32s/it]


Trial 2, Episode 770, Average Reward: 6782.43


Trial 2:  78%|███████▊  | 781/1000 [25:45<08:01,  2.20s/it]


Trial 2, Episode 780, Average Reward: 6781.17


Trial 2:  79%|███████▉  | 791/1000 [26:06<07:29,  2.15s/it]


Trial 2, Episode 790, Average Reward: 6779.78


Trial 2:  80%|████████  | 801/1000 [26:28<07:07,  2.15s/it]


Trial 2, Episode 800, Average Reward: 6778.75


Trial 2:  81%|████████  | 811/1000 [26:48<06:05,  1.93s/it]


Trial 2, Episode 810, Average Reward: 6770.88


Trial 2:  82%|████████▏ | 821/1000 [27:08<05:42,  1.91s/it]


Trial 2, Episode 820, Average Reward: 6771.51


Trial 2:  83%|████████▎ | 831/1000 [27:29<06:09,  2.19s/it]


Trial 2, Episode 830, Average Reward: 6752.98


Trial 2:  84%|████████▍ | 841/1000 [27:51<05:56,  2.24s/it]


Trial 2, Episode 840, Average Reward: 6759.29


Trial 2:  85%|████████▌ | 851/1000 [28:10<04:37,  1.86s/it]


Trial 2, Episode 850, Average Reward: 6747.72


Trial 2:  86%|████████▌ | 861/1000 [28:29<04:16,  1.85s/it]


Trial 2, Episode 860, Average Reward: 6714.45


Trial 2:  87%|████████▋ | 871/1000 [28:48<03:57,  1.84s/it]


Trial 2, Episode 870, Average Reward: 6705.36


Trial 2:  88%|████████▊ | 881/1000 [29:07<03:42,  1.87s/it]


Trial 2, Episode 880, Average Reward: 6723.89


Trial 2:  89%|████████▉ | 891/1000 [29:26<03:19,  1.83s/it]


Trial 2, Episode 890, Average Reward: 6720.53


Trial 2:  90%|█████████ | 901/1000 [29:44<03:00,  1.82s/it]


Trial 2, Episode 900, Average Reward: 6660.36


Trial 2:  91%|█████████ | 911/1000 [30:05<02:57,  2.00s/it]


Trial 2, Episode 910, Average Reward: 6634.88


Trial 2:  92%|█████████▏| 921/1000 [30:25<02:41,  2.05s/it]


Trial 2, Episode 920, Average Reward: 6618.71


Trial 2:  93%|█████████▎| 931/1000 [30:45<02:12,  1.92s/it]


Trial 2, Episode 930, Average Reward: 6639.85


Trial 2:  94%|█████████▍| 941/1000 [31:06<02:01,  2.06s/it]


Trial 2, Episode 940, Average Reward: 6643.32


Trial 2:  95%|█████████▌| 951/1000 [31:27<01:41,  2.08s/it]


Trial 2, Episode 950, Average Reward: 6657.13


Trial 2:  96%|█████████▌| 961/1000 [31:50<01:30,  2.31s/it]


Trial 2, Episode 960, Average Reward: 6665.82


Trial 2:  97%|█████████▋| 971/1000 [32:11<01:01,  2.12s/it]


Trial 2, Episode 970, Average Reward: 6676.88


Trial 2:  98%|█████████▊| 981/1000 [32:32<00:39,  2.08s/it]


Trial 2, Episode 980, Average Reward: 6669.16


Trial 2:  99%|█████████▉| 991/1000 [32:52<00:17,  1.98s/it]


Trial 2, Episode 990, Average Reward: 6658.76


Trial 2: 100%|██████████| 1000/1000 [33:11<00:00,  1.99s/it]


Video saved as 'ppo_hopper_trial_2.mp4'

Starting Trial 3/5 with environment Hopper-v4


Trial 3:   0%|          | 1/1000 [00:02<35:24,  2.13s/it]


Trial 3, Episode 0, Average Reward: 1405.04


Trial 3:   1%|          | 11/1000 [00:23<37:31,  2.28s/it]


Trial 3, Episode 10, Average Reward: 3661.25


Trial 3:   2%|▏         | 21/1000 [00:46<33:02,  2.03s/it]


Trial 3, Episode 20, Average Reward: 4224.09


Trial 3:   3%|▎         | 31/1000 [01:08<35:33,  2.20s/it]


Trial 3, Episode 30, Average Reward: 4606.83


Trial 3:   4%|▍         | 41/1000 [01:29<33:15,  2.08s/it]


Trial 3, Episode 40, Average Reward: 4929.70


Trial 3:   5%|▌         | 51/1000 [01:51<32:29,  2.05s/it]


Trial 3, Episode 50, Average Reward: 5117.27


Trial 3:   6%|▌         | 61/1000 [02:12<35:03,  2.24s/it]


Trial 3, Episode 60, Average Reward: 5300.72


Trial 3:   7%|▋         | 71/1000 [02:36<35:37,  2.30s/it]


Trial 3, Episode 70, Average Reward: 5290.96


Trial 3:   8%|▊         | 81/1000 [03:00<37:42,  2.46s/it]


Trial 3, Episode 80, Average Reward: 5357.54


Trial 3:   9%|▉         | 91/1000 [03:23<35:10,  2.32s/it]


Trial 3, Episode 90, Average Reward: 5442.72


Trial 3:  10%|█         | 101/1000 [03:45<31:52,  2.13s/it]


Trial 3, Episode 100, Average Reward: 5560.54


Trial 3:  11%|█         | 111/1000 [04:08<33:18,  2.25s/it]


Trial 3, Episode 110, Average Reward: 5752.42


Trial 3:  12%|█▏        | 121/1000 [04:31<33:04,  2.26s/it]


Trial 3, Episode 120, Average Reward: 5897.02


Trial 3:  13%|█▎        | 131/1000 [04:51<27:46,  1.92s/it]


Trial 3, Episode 130, Average Reward: 5987.33


Trial 3:  14%|█▍        | 141/1000 [05:09<25:56,  1.81s/it]


Trial 3, Episode 140, Average Reward: 6017.69


Trial 3:  15%|█▌        | 151/1000 [05:30<27:36,  1.95s/it]


Trial 3, Episode 150, Average Reward: 6071.54


Trial 3:  16%|█▌        | 161/1000 [05:50<30:37,  2.19s/it]


Trial 3, Episode 160, Average Reward: 6073.81


Trial 3:  17%|█▋        | 171/1000 [06:11<28:21,  2.05s/it]


Trial 3, Episode 170, Average Reward: 6185.99


Trial 3:  18%|█▊        | 181/1000 [06:33<28:23,  2.08s/it]


Trial 3, Episode 180, Average Reward: 6237.33


Trial 3:  19%|█▉        | 191/1000 [06:54<27:47,  2.06s/it]


Trial 3, Episode 190, Average Reward: 6286.11


Trial 3:  20%|██        | 201/1000 [07:14<26:38,  2.00s/it]


Trial 3, Episode 200, Average Reward: 6300.97


Trial 3:  21%|██        | 211/1000 [07:37<29:38,  2.25s/it]


Trial 3, Episode 210, Average Reward: 6369.15


Trial 3:  22%|██▏       | 221/1000 [07:59<28:55,  2.23s/it]


Trial 3, Episode 220, Average Reward: 6395.09


Trial 3:  23%|██▎       | 231/1000 [08:21<27:29,  2.15s/it]


Trial 3, Episode 230, Average Reward: 6410.16


Trial 3:  24%|██▍       | 241/1000 [08:42<27:16,  2.16s/it]


Trial 3, Episode 240, Average Reward: 6452.50


Trial 3:  25%|██▌       | 251/1000 [09:05<27:55,  2.24s/it]


Trial 3, Episode 250, Average Reward: 6456.40


Trial 3:  26%|██▌       | 261/1000 [09:26<24:50,  2.02s/it]


Trial 3, Episode 260, Average Reward: 6472.08


Trial 3:  27%|██▋       | 271/1000 [09:44<22:39,  1.86s/it]


Trial 3, Episode 270, Average Reward: 6481.25


Trial 3:  28%|██▊       | 281/1000 [10:03<22:44,  1.90s/it]


Trial 3, Episode 280, Average Reward: 6481.46


Trial 3:  29%|██▉       | 291/1000 [10:23<22:46,  1.93s/it]


Trial 3, Episode 290, Average Reward: 6470.14


Trial 3:  30%|███       | 301/1000 [10:41<21:38,  1.86s/it]


Trial 3, Episode 300, Average Reward: 6484.29


Trial 3:  31%|███       | 311/1000 [11:00<21:15,  1.85s/it]


Trial 3, Episode 310, Average Reward: 6471.57


Trial 3:  32%|███▏      | 321/1000 [11:18<21:13,  1.88s/it]


Trial 3, Episode 320, Average Reward: 6486.79


Trial 3:  33%|███▎      | 331/1000 [11:37<20:42,  1.86s/it]


Trial 3, Episode 330, Average Reward: 6521.85


Trial 3:  34%|███▍      | 341/1000 [11:55<20:16,  1.85s/it]


Trial 3, Episode 340, Average Reward: 6535.76


Trial 3:  35%|███▌      | 351/1000 [12:16<23:03,  2.13s/it]


Trial 3, Episode 350, Average Reward: 6561.01


Trial 3:  36%|███▌      | 361/1000 [12:38<23:26,  2.20s/it]


Trial 3, Episode 360, Average Reward: 6601.43


Trial 3:  37%|███▋      | 371/1000 [13:00<22:42,  2.17s/it]


Trial 3, Episode 370, Average Reward: 6620.74


Trial 3:  38%|███▊      | 381/1000 [13:20<19:18,  1.87s/it]


Trial 3, Episode 380, Average Reward: 6661.92


Trial 3:  39%|███▉      | 391/1000 [13:38<18:21,  1.81s/it]


Trial 3, Episode 390, Average Reward: 6685.70


Trial 3:  40%|████      | 401/1000 [13:56<17:51,  1.79s/it]


Trial 3, Episode 400, Average Reward: 6716.81


Trial 3:  41%|████      | 411/1000 [14:14<17:28,  1.78s/it]


Trial 3, Episode 410, Average Reward: 6762.27


Trial 3:  42%|████▏     | 421/1000 [14:32<17:23,  1.80s/it]


Trial 3, Episode 420, Average Reward: 6775.31


Trial 3:  43%|████▎     | 431/1000 [14:51<18:56,  2.00s/it]


Trial 3, Episode 430, Average Reward: 6770.40


Trial 3:  44%|████▍     | 441/1000 [15:13<20:11,  2.17s/it]


Trial 3, Episode 440, Average Reward: 6778.09


Trial 3:  45%|████▌     | 451/1000 [15:33<18:54,  2.07s/it]


Trial 3, Episode 450, Average Reward: 6791.04


Trial 3:  46%|████▌     | 461/1000 [15:53<18:13,  2.03s/it]


Trial 3, Episode 460, Average Reward: 6790.33


Trial 3:  47%|████▋     | 471/1000 [16:13<18:30,  2.10s/it]


Trial 3, Episode 470, Average Reward: 6819.67


Trial 3:  48%|████▊     | 481/1000 [16:35<18:12,  2.10s/it]


Trial 3, Episode 480, Average Reward: 6828.82


Trial 3:  49%|████▉     | 491/1000 [16:56<18:18,  2.16s/it]


Trial 3, Episode 490, Average Reward: 6847.48


Trial 3:  50%|█████     | 501/1000 [17:18<18:34,  2.23s/it]


Trial 3, Episode 500, Average Reward: 6852.53


Trial 3:  51%|█████     | 511/1000 [17:40<17:39,  2.17s/it]


Trial 3, Episode 510, Average Reward: 6851.91


Trial 3:  52%|█████▏    | 521/1000 [18:01<17:26,  2.19s/it]


Trial 3, Episode 520, Average Reward: 6856.09


Trial 3:  53%|█████▎    | 531/1000 [18:24<17:41,  2.26s/it]


Trial 3, Episode 530, Average Reward: 6864.14


Trial 3:  54%|█████▍    | 541/1000 [18:44<15:47,  2.06s/it]


Trial 3, Episode 540, Average Reward: 6860.63


Trial 3:  55%|█████▌    | 551/1000 [19:05<15:41,  2.10s/it]


Trial 3, Episode 550, Average Reward: 6866.88


Trial 3:  56%|█████▌    | 561/1000 [19:26<15:51,  2.17s/it]


Trial 3, Episode 560, Average Reward: 6882.63


Trial 3:  57%|█████▋    | 571/1000 [19:48<14:39,  2.05s/it]


Trial 3, Episode 570, Average Reward: 6888.07


Trial 3:  58%|█████▊    | 581/1000 [20:09<15:00,  2.15s/it]


Trial 3, Episode 580, Average Reward: 6898.98


Trial 3:  59%|█████▉    | 591/1000 [20:29<13:09,  1.93s/it]


Trial 3, Episode 590, Average Reward: 6900.78


Trial 3:  60%|██████    | 601/1000 [20:51<15:59,  2.40s/it]


Trial 3, Episode 600, Average Reward: 6907.38


Trial 3:  61%|██████    | 611/1000 [21:15<15:31,  2.39s/it]


Trial 3, Episode 610, Average Reward: 6916.84


Trial 3:  62%|██████▏   | 621/1000 [21:36<13:13,  2.09s/it]


Trial 3, Episode 620, Average Reward: 6930.55


Trial 3:  63%|██████▎   | 631/1000 [21:58<14:07,  2.30s/it]


Trial 3, Episode 630, Average Reward: 6945.97


Trial 3:  64%|██████▍   | 641/1000 [22:19<12:18,  2.06s/it]


Trial 3, Episode 640, Average Reward: 6962.83


Trial 3:  65%|██████▌   | 651/1000 [22:39<12:22,  2.13s/it]


Trial 3, Episode 650, Average Reward: 6956.31


Trial 3:  66%|██████▌   | 661/1000 [23:00<11:34,  2.05s/it]


Trial 3, Episode 660, Average Reward: 6947.93


Trial 3:  67%|██████▋   | 671/1000 [23:19<10:10,  1.86s/it]


Trial 3, Episode 670, Average Reward: 6950.01


Trial 3:  68%|██████▊   | 681/1000 [23:37<09:51,  1.85s/it]


Trial 3, Episode 680, Average Reward: 6962.50


Trial 3:  69%|██████▉   | 691/1000 [23:56<09:31,  1.85s/it]


Trial 3, Episode 690, Average Reward: 6971.28


Trial 3:  70%|███████   | 701/1000 [24:14<09:13,  1.85s/it]


Trial 3, Episode 700, Average Reward: 6981.66


Trial 3:  71%|███████   | 711/1000 [24:33<08:54,  1.85s/it]


Trial 3, Episode 710, Average Reward: 6992.27


Trial 3:  72%|███████▏  | 721/1000 [24:52<08:34,  1.84s/it]


Trial 3, Episode 720, Average Reward: 6992.20


Trial 3:  73%|███████▎  | 731/1000 [25:12<09:39,  2.15s/it]


Trial 3, Episode 730, Average Reward: 6990.71


Trial 3:  74%|███████▍  | 741/1000 [25:31<08:05,  1.88s/it]


Trial 3, Episode 740, Average Reward: 6987.56


Trial 3:  75%|███████▌  | 751/1000 [25:51<08:16,  1.99s/it]


Trial 3, Episode 750, Average Reward: 7008.48


Trial 3:  76%|███████▌  | 761/1000 [26:11<07:30,  1.88s/it]


Trial 3, Episode 760, Average Reward: 7029.50


Trial 3:  77%|███████▋  | 771/1000 [26:29<07:12,  1.89s/it]


Trial 3, Episode 770, Average Reward: 7032.19


Trial 3:  78%|███████▊  | 781/1000 [26:48<06:48,  1.86s/it]


Trial 3, Episode 780, Average Reward: 7037.30


Trial 3:  79%|███████▉  | 791/1000 [27:07<06:48,  1.95s/it]


Trial 3, Episode 790, Average Reward: 7049.07


Trial 3:  80%|████████  | 801/1000 [27:26<06:05,  1.84s/it]


Trial 3, Episode 800, Average Reward: 7054.26


Trial 3:  81%|████████  | 811/1000 [27:45<06:04,  1.93s/it]


Trial 3, Episode 810, Average Reward: 7069.45


Trial 3:  82%|████████▏ | 821/1000 [28:04<05:22,  1.80s/it]


Trial 3, Episode 820, Average Reward: 7078.64


Trial 3:  83%|████████▎ | 831/1000 [28:21<05:02,  1.79s/it]


Trial 3, Episode 830, Average Reward: 7086.52


Trial 3:  84%|████████▍ | 841/1000 [28:39<04:43,  1.78s/it]


Trial 3, Episode 840, Average Reward: 7092.49


Trial 3:  85%|████████▌ | 851/1000 [28:58<04:28,  1.80s/it]


Trial 3, Episode 850, Average Reward: 7068.69


Trial 3:  86%|████████▌ | 861/1000 [29:16<04:13,  1.82s/it]


Trial 3, Episode 860, Average Reward: 7057.68


Trial 3:  87%|████████▋ | 871/1000 [29:34<03:55,  1.83s/it]


Trial 3, Episode 870, Average Reward: 7055.93


Trial 3:  88%|████████▊ | 881/1000 [29:52<03:33,  1.79s/it]


Trial 3, Episode 880, Average Reward: 7041.68


Trial 3:  89%|████████▉ | 891/1000 [30:10<03:15,  1.80s/it]


Trial 3, Episode 890, Average Reward: 7022.12


Trial 3:  90%|█████████ | 901/1000 [30:28<02:57,  1.79s/it]


Trial 3, Episode 900, Average Reward: 7013.66


Trial 3:  91%|█████████ | 911/1000 [30:46<02:45,  1.86s/it]


Trial 3, Episode 910, Average Reward: 6996.07


Trial 3:  92%|█████████▏| 921/1000 [31:04<02:21,  1.79s/it]


Trial 3, Episode 920, Average Reward: 6988.83


Trial 3:  93%|█████████▎| 931/1000 [31:22<02:02,  1.78s/it]


Trial 3, Episode 930, Average Reward: 6980.44


Trial 3:  94%|█████████▍| 941/1000 [31:40<01:50,  1.88s/it]


Trial 3, Episode 940, Average Reward: 6976.03


Trial 3:  95%|█████████▌| 951/1000 [31:59<01:27,  1.79s/it]


Trial 3, Episode 950, Average Reward: 6991.10


Trial 3:  96%|█████████▌| 961/1000 [32:17<01:14,  1.91s/it]


Trial 3, Episode 960, Average Reward: 6987.85


Trial 3:  97%|█████████▋| 971/1000 [32:36<00:52,  1.80s/it]


Trial 3, Episode 970, Average Reward: 6981.89


Trial 3:  98%|█████████▊| 981/1000 [32:54<00:34,  1.82s/it]


Trial 3, Episode 980, Average Reward: 6975.43


Trial 3:  99%|█████████▉| 991/1000 [33:12<00:16,  1.78s/it]


Trial 3, Episode 990, Average Reward: 6972.34


Trial 3: 100%|██████████| 1000/1000 [33:28<00:00,  2.01s/it]


Video saved as 'ppo_hopper_trial_3.mp4'

Starting Trial 4/5 with environment Hopper-v4


Trial 4:   0%|          | 1/1000 [00:01<31:16,  1.88s/it]


Trial 4, Episode 0, Average Reward: 1223.01


Trial 4:   1%|          | 11/1000 [00:20<30:18,  1.84s/it]


Trial 4, Episode 10, Average Reward: 3265.70


Trial 4:   2%|▏         | 21/1000 [00:38<29:21,  1.80s/it]


Trial 4, Episode 20, Average Reward: 3926.34


Trial 4:   3%|▎         | 31/1000 [00:56<29:01,  1.80s/it]


Trial 4, Episode 30, Average Reward: 4292.62


Trial 4:   4%|▍         | 41/1000 [01:14<30:02,  1.88s/it]


Trial 4, Episode 40, Average Reward: 4625.76


Trial 4:   5%|▌         | 51/1000 [01:32<28:22,  1.79s/it]


Trial 4, Episode 50, Average Reward: 4857.30


Trial 4:   6%|▌         | 61/1000 [01:50<28:15,  1.81s/it]


Trial 4, Episode 60, Average Reward: 5033.31


Trial 4:   7%|▋         | 71/1000 [02:08<27:49,  1.80s/it]


Trial 4, Episode 70, Average Reward: 5194.42


Trial 4:   8%|▊         | 81/1000 [02:27<27:19,  1.78s/it]


Trial 4, Episode 80, Average Reward: 5320.06


Trial 4:   9%|▉         | 91/1000 [02:45<27:14,  1.80s/it]


Trial 4, Episode 90, Average Reward: 5406.93


Trial 4:  10%|█         | 101/1000 [03:02<26:37,  1.78s/it]


Trial 4, Episode 100, Average Reward: 5542.53


Trial 4:  11%|█         | 111/1000 [03:21<26:48,  1.81s/it]


Trial 4, Episode 110, Average Reward: 5836.16


Trial 4:  12%|█▏        | 121/1000 [03:39<27:07,  1.85s/it]


Trial 4, Episode 120, Average Reward: 6023.62


Trial 4:  13%|█▎        | 131/1000 [03:57<26:08,  1.80s/it]


Trial 4, Episode 130, Average Reward: 6153.87


Trial 4:  14%|█▍        | 141/1000 [04:16<26:59,  1.88s/it]


Trial 4, Episode 140, Average Reward: 6232.22


Trial 4:  15%|█▌        | 151/1000 [04:33<25:10,  1.78s/it]


Trial 4, Episode 150, Average Reward: 6306.12


Trial 4:  16%|█▌        | 161/1000 [04:51<25:21,  1.81s/it]


Trial 4, Episode 160, Average Reward: 6369.61


Trial 4:  17%|█▋        | 171/1000 [05:09<24:48,  1.80s/it]


Trial 4, Episode 170, Average Reward: 6411.22


Trial 4:  18%|█▊        | 181/1000 [05:28<24:39,  1.81s/it]


Trial 4, Episode 180, Average Reward: 6452.56


Trial 4:  19%|█▉        | 191/1000 [05:46<24:10,  1.79s/it]


Trial 4, Episode 190, Average Reward: 6497.81


Trial 4:  20%|██        | 201/1000 [06:06<29:28,  2.21s/it]


Trial 4, Episode 200, Average Reward: 6511.33


Trial 4:  21%|██        | 211/1000 [06:26<25:35,  1.95s/it]


Trial 4, Episode 210, Average Reward: 6527.66


Trial 4:  22%|██▏       | 221/1000 [06:46<25:38,  1.97s/it]


Trial 4, Episode 220, Average Reward: 6550.98


Trial 4:  23%|██▎       | 231/1000 [07:05<23:25,  1.83s/it]


Trial 4, Episode 230, Average Reward: 6579.22


Trial 4:  24%|██▍       | 241/1000 [07:24<23:49,  1.88s/it]


Trial 4, Episode 240, Average Reward: 6611.42


Trial 4:  25%|██▌       | 251/1000 [07:42<22:30,  1.80s/it]


Trial 4, Episode 250, Average Reward: 6641.14


Trial 4:  26%|██▌       | 261/1000 [08:00<22:35,  1.83s/it]


Trial 4, Episode 260, Average Reward: 6654.87


Trial 4:  27%|██▋       | 271/1000 [08:18<22:25,  1.85s/it]


Trial 4, Episode 270, Average Reward: 6675.65


Trial 4:  28%|██▊       | 281/1000 [08:37<21:49,  1.82s/it]


Trial 4, Episode 280, Average Reward: 6691.13


Trial 4:  29%|██▉       | 291/1000 [08:55<21:33,  1.82s/it]


Trial 4, Episode 290, Average Reward: 6716.71


Trial 4:  30%|███       | 301/1000 [09:14<22:49,  1.96s/it]


Trial 4, Episode 300, Average Reward: 6750.72


Trial 4:  31%|███       | 311/1000 [09:32<20:51,  1.82s/it]


Trial 4, Episode 310, Average Reward: 6772.77


Trial 4:  32%|███▏      | 321/1000 [09:50<20:25,  1.81s/it]


Trial 4, Episode 320, Average Reward: 6785.60


Trial 4:  33%|███▎      | 331/1000 [10:08<20:03,  1.80s/it]


Trial 4, Episode 330, Average Reward: 6827.98


Trial 4:  34%|███▍      | 341/1000 [10:26<19:52,  1.81s/it]


Trial 4, Episode 340, Average Reward: 6846.06


Trial 4:  35%|███▌      | 351/1000 [10:44<19:38,  1.82s/it]


Trial 4, Episode 350, Average Reward: 6853.95


Trial 4:  36%|███▌      | 361/1000 [11:03<19:35,  1.84s/it]


Trial 4, Episode 360, Average Reward: 6884.08


Trial 4:  37%|███▋      | 371/1000 [11:21<19:20,  1.84s/it]


Trial 4, Episode 370, Average Reward: 6907.01


Trial 4:  38%|███▊      | 381/1000 [11:39<18:39,  1.81s/it]


Trial 4, Episode 380, Average Reward: 6933.13


Trial 4:  39%|███▉      | 391/1000 [11:57<18:18,  1.80s/it]


Trial 4, Episode 390, Average Reward: 6942.37


Trial 4:  40%|████      | 401/1000 [12:16<18:31,  1.86s/it]


Trial 4, Episode 400, Average Reward: 6966.56


Trial 4:  41%|████      | 411/1000 [12:34<17:37,  1.80s/it]


Trial 4, Episode 410, Average Reward: 6991.51


Trial 4:  42%|████▏     | 421/1000 [12:56<23:10,  2.40s/it]


Trial 4, Episode 420, Average Reward: 7021.22


Trial 4:  43%|████▎     | 431/1000 [13:15<18:20,  1.93s/it]


Trial 4, Episode 430, Average Reward: 7028.66


Trial 4:  44%|████▍     | 441/1000 [13:34<16:59,  1.82s/it]


Trial 4, Episode 440, Average Reward: 7046.72


Trial 4:  45%|████▌     | 451/1000 [13:53<16:56,  1.85s/it]


Trial 4, Episode 450, Average Reward: 7050.80


Trial 4:  46%|████▌     | 461/1000 [14:11<17:38,  1.96s/it]


Trial 4, Episode 460, Average Reward: 7052.94


Trial 4:  47%|████▋     | 471/1000 [14:30<16:07,  1.83s/it]


Trial 4, Episode 470, Average Reward: 7058.51


Trial 4:  48%|████▊     | 481/1000 [14:48<15:54,  1.84s/it]


Trial 4, Episode 480, Average Reward: 7048.85


Trial 4:  49%|████▉     | 491/1000 [15:08<18:04,  2.13s/it]


Trial 4, Episode 490, Average Reward: 7055.12


Trial 4:  50%|█████     | 501/1000 [15:29<16:01,  1.93s/it]


Trial 4, Episode 500, Average Reward: 7043.19


Trial 4:  51%|█████     | 511/1000 [15:50<16:39,  2.04s/it]


Trial 4, Episode 510, Average Reward: 7051.13


Trial 4:  52%|█████▏    | 521/1000 [16:11<18:19,  2.30s/it]


Trial 4, Episode 520, Average Reward: 7049.00


Trial 4:  53%|█████▎    | 531/1000 [16:32<16:10,  2.07s/it]


Trial 4, Episode 530, Average Reward: 7057.71


Trial 4:  54%|█████▍    | 541/1000 [16:52<15:14,  1.99s/it]


Trial 4, Episode 540, Average Reward: 7079.99


Trial 4:  55%|█████▌    | 551/1000 [17:12<15:44,  2.10s/it]


Trial 4, Episode 550, Average Reward: 7098.69


Trial 4:  56%|█████▌    | 561/1000 [17:31<13:46,  1.88s/it]


Trial 4, Episode 560, Average Reward: 7102.08


Trial 4:  57%|█████▋    | 571/1000 [17:50<13:24,  1.87s/it]


Trial 4, Episode 570, Average Reward: 7108.15


Trial 4:  58%|█████▊    | 581/1000 [18:09<13:09,  1.89s/it]


Trial 4, Episode 580, Average Reward: 7138.09


Trial 4:  59%|█████▉    | 591/1000 [18:28<12:52,  1.89s/it]


Trial 4, Episode 590, Average Reward: 7153.78


Trial 4:  60%|██████    | 601/1000 [18:48<13:55,  2.09s/it]


Trial 4, Episode 600, Average Reward: 7180.63


Trial 4:  61%|██████    | 611/1000 [19:08<12:16,  1.89s/it]


Trial 4, Episode 610, Average Reward: 7169.98


Trial 4:  62%|██████▏   | 621/1000 [19:27<12:07,  1.92s/it]


Trial 4, Episode 620, Average Reward: 7143.32


Trial 4:  63%|██████▎   | 631/1000 [19:45<11:28,  1.87s/it]


Trial 4, Episode 630, Average Reward: 7124.31


Trial 4:  64%|██████▍   | 641/1000 [20:04<11:11,  1.87s/it]


Trial 4, Episode 640, Average Reward: 7100.98


Trial 4:  65%|██████▌   | 651/1000 [20:23<11:00,  1.89s/it]


Trial 4, Episode 650, Average Reward: 7109.68


Trial 4:  66%|██████▌   | 661/1000 [20:42<10:38,  1.88s/it]


Trial 4, Episode 660, Average Reward: 7130.89


Trial 4:  67%|██████▋   | 671/1000 [21:01<10:14,  1.87s/it]


Trial 4, Episode 670, Average Reward: 7138.19


Trial 4:  68%|██████▊   | 681/1000 [21:20<10:12,  1.92s/it]


Trial 4, Episode 680, Average Reward: 7141.10


Trial 4:  69%|██████▉   | 691/1000 [21:39<09:37,  1.87s/it]


Trial 4, Episode 690, Average Reward: 7148.56


Trial 4:  70%|███████   | 701/1000 [21:58<09:20,  1.87s/it]


Trial 4, Episode 700, Average Reward: 7143.48


Trial 4:  71%|███████   | 711/1000 [22:18<09:26,  1.96s/it]


Trial 4, Episode 710, Average Reward: 7154.22


Trial 4:  72%|███████▏  | 721/1000 [22:36<08:41,  1.87s/it]


Trial 4, Episode 720, Average Reward: 7182.03


Trial 4:  73%|███████▎  | 731/1000 [22:55<08:23,  1.87s/it]


Trial 4, Episode 730, Average Reward: 7186.22


Trial 4:  74%|███████▍  | 741/1000 [23:14<08:26,  1.95s/it]


Trial 4, Episode 740, Average Reward: 7173.56


Trial 4:  75%|███████▌  | 751/1000 [23:33<07:44,  1.87s/it]


Trial 4, Episode 750, Average Reward: 7155.76


Trial 4:  76%|███████▌  | 761/1000 [23:52<07:33,  1.90s/it]


Trial 4, Episode 760, Average Reward: 7141.85


Trial 4:  77%|███████▋  | 771/1000 [24:11<07:33,  1.98s/it]


Trial 4, Episode 770, Average Reward: 7134.72


Trial 4:  78%|███████▊  | 781/1000 [24:30<06:52,  1.89s/it]


Trial 4, Episode 780, Average Reward: 7111.65


Trial 4:  79%|███████▉  | 791/1000 [24:49<06:33,  1.88s/it]


Trial 4, Episode 790, Average Reward: 7081.29


Trial 4:  80%|████████  | 801/1000 [25:08<06:10,  1.86s/it]


Trial 4, Episode 800, Average Reward: 7065.04


Trial 4:  81%|████████  | 811/1000 [25:27<05:57,  1.89s/it]


Trial 4, Episode 810, Average Reward: 7051.31


Trial 4:  82%|████████▏ | 821/1000 [25:46<05:36,  1.88s/it]


Trial 4, Episode 820, Average Reward: 7038.82


Trial 4:  83%|████████▎ | 831/1000 [26:05<05:14,  1.86s/it]


Trial 4, Episode 830, Average Reward: 7032.94


Trial 4:  84%|████████▍ | 841/1000 [26:24<05:01,  1.90s/it]


Trial 4, Episode 840, Average Reward: 7060.84


Trial 4:  85%|████████▌ | 851/1000 [26:43<04:38,  1.87s/it]


Trial 4, Episode 850, Average Reward: 7098.37


Trial 4:  86%|████████▌ | 861/1000 [27:01<04:18,  1.86s/it]


Trial 4, Episode 860, Average Reward: 7122.04


Trial 4:  87%|████████▋ | 871/1000 [27:21<04:06,  1.91s/it]


Trial 4, Episode 870, Average Reward: 7150.45


Trial 4:  88%|████████▊ | 881/1000 [27:40<03:43,  1.88s/it]


Trial 4, Episode 880, Average Reward: 7184.48


Trial 4:  89%|████████▉ | 891/1000 [27:59<03:35,  1.97s/it]


Trial 4, Episode 890, Average Reward: 7236.96


Trial 4:  90%|█████████ | 901/1000 [28:18<03:10,  1.93s/it]


Trial 4, Episode 900, Average Reward: 7283.65


Trial 4:  91%|█████████ | 911/1000 [28:37<02:46,  1.88s/it]


Trial 4, Episode 910, Average Reward: 7312.66


Trial 4:  92%|█████████▏| 921/1000 [28:56<02:28,  1.88s/it]


Trial 4, Episode 920, Average Reward: 7348.70


Trial 4:  93%|█████████▎| 931/1000 [29:15<02:16,  1.97s/it]


Trial 4, Episode 930, Average Reward: 7372.55


Trial 4:  94%|█████████▍| 941/1000 [29:34<01:53,  1.92s/it]


Trial 4, Episode 940, Average Reward: 7381.89


Trial 4:  95%|█████████▌| 951/1000 [29:53<01:31,  1.88s/it]


Trial 4, Episode 950, Average Reward: 7370.27


Trial 4:  96%|█████████▌| 961/1000 [30:12<01:19,  2.05s/it]


Trial 4, Episode 960, Average Reward: 7354.68


Trial 4:  97%|█████████▋| 971/1000 [30:31<00:54,  1.87s/it]


Trial 4, Episode 970, Average Reward: 7333.85


Trial 4:  98%|█████████▊| 981/1000 [30:50<00:35,  1.87s/it]


Trial 4, Episode 980, Average Reward: 7329.55


Trial 4:  99%|█████████▉| 991/1000 [31:08<00:16,  1.86s/it]


Trial 4, Episode 990, Average Reward: 7303.09


Trial 4: 100%|██████████| 1000/1000 [31:26<00:00,  1.89s/it]


Video saved as 'ppo_hopper_trial_4.mp4'

Starting Trial 5/5 with environment Hopper-v4


Trial 5:   0%|          | 1/1000 [00:01<32:16,  1.94s/it]


Trial 5, Episode 0, Average Reward: 1273.58


Trial 5:   1%|          | 11/1000 [00:20<30:58,  1.88s/it]


Trial 5, Episode 10, Average Reward: 3359.51


Trial 5:   2%|▏         | 21/1000 [00:39<30:32,  1.87s/it]


Trial 5, Episode 20, Average Reward: 3945.15


Trial 5:   3%|▎         | 31/1000 [00:59<30:39,  1.90s/it]


Trial 5, Episode 30, Average Reward: 4243.47


Trial 5:   4%|▍         | 41/1000 [01:17<30:00,  1.88s/it]


Trial 5, Episode 40, Average Reward: 4434.79


Trial 5:   5%|▌         | 51/1000 [01:36<29:58,  1.89s/it]


Trial 5, Episode 50, Average Reward: 4661.50


Trial 5:   6%|▌         | 61/1000 [01:56<29:44,  1.90s/it]


Trial 5, Episode 60, Average Reward: 4853.34


Trial 5:   7%|▋         | 71/1000 [02:14<29:02,  1.88s/it]


Trial 5, Episode 70, Average Reward: 4992.96


Trial 5:   8%|▊         | 81/1000 [02:33<28:41,  1.87s/it]


Trial 5, Episode 80, Average Reward: 5057.20


Trial 5:   9%|▉         | 91/1000 [02:52<28:31,  1.88s/it]


Trial 5, Episode 90, Average Reward: 5191.43


Trial 5:  10%|█         | 101/1000 [03:11<28:06,  1.88s/it]


Trial 5, Episode 100, Average Reward: 5334.72


Trial 5:  11%|█         | 111/1000 [03:30<27:43,  1.87s/it]


Trial 5, Episode 110, Average Reward: 5618.85


Trial 5:  12%|█▏        | 121/1000 [03:49<27:49,  1.90s/it]


Trial 5, Episode 120, Average Reward: 5801.28


Trial 5:  13%|█▎        | 131/1000 [04:07<27:10,  1.88s/it]


Trial 5, Episode 130, Average Reward: 5938.55


Trial 5:  14%|█▍        | 141/1000 [04:26<26:49,  1.87s/it]


Trial 5, Episode 140, Average Reward: 6067.66


Trial 5:  15%|█▌        | 151/1000 [04:46<27:55,  1.97s/it]


Trial 5, Episode 150, Average Reward: 6170.81


Trial 5:  16%|█▌        | 161/1000 [05:04<26:10,  1.87s/it]


Trial 5, Episode 160, Average Reward: 6249.16


Trial 5:  17%|█▋        | 171/1000 [05:23<25:59,  1.88s/it]


Trial 5, Episode 170, Average Reward: 6301.97


Trial 5:  18%|█▊        | 181/1000 [05:42<27:45,  2.03s/it]


Trial 5, Episode 180, Average Reward: 6412.19


Trial 5:  19%|█▉        | 191/1000 [06:01<25:22,  1.88s/it]


Trial 5, Episode 190, Average Reward: 6440.17


Trial 5:  20%|██        | 201/1000 [06:20<25:01,  1.88s/it]


Trial 5, Episode 200, Average Reward: 6465.98


Trial 5:  21%|██        | 211/1000 [06:39<24:33,  1.87s/it]


Trial 5, Episode 210, Average Reward: 6484.43


Trial 5:  22%|██▏       | 221/1000 [06:58<24:20,  1.87s/it]


Trial 5, Episode 220, Average Reward: 6495.40


Trial 5:  23%|██▎       | 231/1000 [07:17<24:36,  1.92s/it]


Trial 5, Episode 230, Average Reward: 6496.43


Trial 5:  24%|██▍       | 241/1000 [07:36<23:41,  1.87s/it]


Trial 5, Episode 240, Average Reward: 6468.05


Trial 5:  25%|██▌       | 251/1000 [07:55<23:38,  1.89s/it]


Trial 5, Episode 250, Average Reward: 6437.65


Trial 5:  26%|██▌       | 261/1000 [08:14<23:12,  1.88s/it]


Trial 5, Episode 260, Average Reward: 6434.25


Trial 5:  27%|██▋       | 271/1000 [08:33<22:40,  1.87s/it]


Trial 5, Episode 270, Average Reward: 6455.74


Trial 5:  28%|██▊       | 281/1000 [08:52<22:53,  1.91s/it]


Trial 5, Episode 280, Average Reward: 6447.72


Trial 5:  29%|██▉       | 291/1000 [09:11<22:01,  1.86s/it]


Trial 5, Episode 290, Average Reward: 6449.65


Trial 5:  30%|███       | 301/1000 [09:30<21:47,  1.87s/it]


Trial 5, Episode 300, Average Reward: 6448.49


Trial 5:  31%|███       | 311/1000 [09:49<22:06,  1.92s/it]


Trial 5, Episode 310, Average Reward: 6425.99


Trial 5:  32%|███▏      | 321/1000 [10:08<21:09,  1.87s/it]


Trial 5, Episode 320, Average Reward: 6433.59


Trial 5:  33%|███▎      | 331/1000 [10:26<20:48,  1.87s/it]


Trial 5, Episode 330, Average Reward: 6443.98


Trial 5:  34%|███▍      | 341/1000 [10:46<21:41,  1.97s/it]


Trial 5, Episode 340, Average Reward: 6496.71


Trial 5:  35%|███▌      | 351/1000 [11:04<20:42,  1.91s/it]


Trial 5, Episode 350, Average Reward: 6520.84


Trial 5:  36%|███▌      | 361/1000 [11:23<19:52,  1.87s/it]


Trial 5, Episode 360, Average Reward: 6497.53


Trial 5:  37%|███▋      | 371/1000 [11:43<21:58,  2.10s/it]


Trial 5, Episode 370, Average Reward: 6488.40


Trial 5:  38%|███▊      | 381/1000 [12:02<19:23,  1.88s/it]


Trial 5, Episode 380, Average Reward: 6489.35


Trial 5:  39%|███▉      | 391/1000 [12:21<19:05,  1.88s/it]


Trial 5, Episode 390, Average Reward: 6486.72


Trial 5:  40%|████      | 401/1000 [12:40<18:49,  1.89s/it]


Trial 5, Episode 400, Average Reward: 6478.90


Trial 5:  41%|████      | 411/1000 [12:59<18:36,  1.90s/it]


Trial 5, Episode 410, Average Reward: 6501.61


Trial 5:  42%|████▏     | 421/1000 [13:18<18:07,  1.88s/it]


Trial 5, Episode 420, Average Reward: 6477.69


Trial 5:  43%|████▎     | 431/1000 [13:37<17:56,  1.89s/it]


Trial 5, Episode 430, Average Reward: 6487.07


Trial 5:  44%|████▍     | 441/1000 [13:56<17:39,  1.90s/it]


Trial 5, Episode 440, Average Reward: 6504.18


Trial 5:  45%|████▌     | 451/1000 [14:15<17:15,  1.89s/it]


Trial 5, Episode 450, Average Reward: 6488.90


Trial 5:  46%|████▌     | 461/1000 [14:34<16:54,  1.88s/it]


Trial 5, Episode 460, Average Reward: 6501.90


Trial 5:  47%|████▋     | 471/1000 [14:53<16:53,  1.91s/it]


Trial 5, Episode 470, Average Reward: 6528.38


Trial 5:  48%|████▊     | 481/1000 [15:12<16:16,  1.88s/it]


Trial 5, Episode 480, Average Reward: 6547.57


Trial 5:  49%|████▉     | 491/1000 [15:31<15:55,  1.88s/it]


Trial 5, Episode 490, Average Reward: 6565.97


Trial 5:  50%|█████     | 501/1000 [15:50<15:52,  1.91s/it]


Trial 5, Episode 500, Average Reward: 6602.67


Trial 5:  51%|█████     | 511/1000 [16:09<15:22,  1.89s/it]


Trial 5, Episode 510, Average Reward: 6616.94


Trial 5:  52%|█████▏    | 521/1000 [16:28<14:58,  1.88s/it]


Trial 5, Episode 520, Average Reward: 6661.29


Trial 5:  53%|█████▎    | 531/1000 [16:47<15:16,  1.95s/it]


Trial 5, Episode 530, Average Reward: 6704.60


Trial 5:  54%|█████▍    | 541/1000 [17:06<14:24,  1.88s/it]


Trial 5, Episode 540, Average Reward: 6716.72


Trial 5:  55%|█████▌    | 551/1000 [17:25<14:14,  1.90s/it]


Trial 5, Episode 550, Average Reward: 6761.50


Trial 5:  56%|█████▌    | 561/1000 [17:44<14:55,  2.04s/it]


Trial 5, Episode 560, Average Reward: 6798.59


Trial 5:  57%|█████▋    | 571/1000 [18:03<13:24,  1.88s/it]


Trial 5, Episode 570, Average Reward: 6802.90


Trial 5:  58%|█████▊    | 581/1000 [18:22<13:08,  1.88s/it]


Trial 5, Episode 580, Average Reward: 6816.55


Trial 5:  59%|█████▉    | 591/1000 [18:41<12:55,  1.90s/it]


Trial 5, Episode 590, Average Reward: 6828.66


Trial 5:  60%|██████    | 601/1000 [19:00<12:24,  1.87s/it]


Trial 5, Episode 600, Average Reward: 6828.33


Trial 5:  61%|██████    | 611/1000 [19:19<12:09,  1.87s/it]


Trial 5, Episode 610, Average Reward: 6818.77


Trial 5:  62%|██████▏   | 621/1000 [19:38<11:51,  1.88s/it]


Trial 5, Episode 620, Average Reward: 6801.24


Trial 5:  63%|██████▎   | 631/1000 [19:57<11:38,  1.89s/it]


Trial 5, Episode 630, Average Reward: 6788.65


Trial 5:  64%|██████▍   | 641/1000 [20:16<11:25,  1.91s/it]


Trial 5, Episode 640, Average Reward: 6782.25


Trial 5:  65%|██████▌   | 651/1000 [20:35<10:53,  1.87s/it]


Trial 5, Episode 650, Average Reward: 6773.56


Trial 5:  66%|██████▌   | 661/1000 [20:54<10:42,  1.89s/it]


Trial 5, Episode 660, Average Reward: 6766.41


Trial 5:  67%|██████▋   | 671/1000 [21:12<10:19,  1.88s/it]


Trial 5, Episode 670, Average Reward: 6735.69


Trial 5:  68%|██████▊   | 681/1000 [21:31<09:59,  1.88s/it]


Trial 5, Episode 680, Average Reward: 6639.45


Trial 5:  69%|██████▉   | 691/1000 [21:50<09:40,  1.88s/it]


Trial 5, Episode 690, Average Reward: 6521.09


Trial 5:  70%|███████   | 701/1000 [22:09<09:17,  1.86s/it]


Trial 5, Episode 700, Average Reward: 6437.49


Trial 5:  71%|███████   | 711/1000 [22:27<09:03,  1.88s/it]


Trial 5, Episode 710, Average Reward: 6400.44


Trial 5:  72%|███████▏  | 721/1000 [22:46<08:43,  1.88s/it]


Trial 5, Episode 720, Average Reward: 6400.65


Trial 5:  73%|███████▎  | 731/1000 [23:05<08:23,  1.87s/it]


Trial 5, Episode 730, Average Reward: 6395.04


Trial 5:  74%|███████▍  | 741/1000 [23:24<08:04,  1.87s/it]


Trial 5, Episode 740, Average Reward: 6400.37


Trial 5:  75%|███████▌  | 751/1000 [23:42<07:44,  1.86s/it]


Trial 5, Episode 750, Average Reward: 6398.55


Trial 5:  76%|███████▌  | 761/1000 [24:01<07:25,  1.86s/it]


Trial 5, Episode 760, Average Reward: 6396.48


Trial 5:  77%|███████▋  | 771/1000 [24:20<07:09,  1.88s/it]


Trial 5, Episode 770, Average Reward: 6439.97


Trial 5:  78%|███████▊  | 781/1000 [24:38<06:50,  1.87s/it]


Trial 5, Episode 780, Average Reward: 6538.97


Trial 5:  79%|███████▉  | 791/1000 [24:57<06:33,  1.88s/it]


Trial 5, Episode 790, Average Reward: 6665.12


Trial 5:  80%|████████  | 801/1000 [25:16<06:07,  1.85s/it]


Trial 5, Episode 800, Average Reward: 6746.98


Trial 5:  81%|████████  | 811/1000 [25:34<05:47,  1.84s/it]


Trial 5, Episode 810, Average Reward: 6804.61


Trial 5:  82%|████████▏ | 821/1000 [25:53<05:28,  1.84s/it]


Trial 5, Episode 820, Average Reward: 6822.81


Trial 5:  83%|████████▎ | 831/1000 [26:11<05:14,  1.86s/it]


Trial 5, Episode 830, Average Reward: 6839.44


Trial 5:  84%|████████▍ | 841/1000 [26:29<04:51,  1.84s/it]


Trial 5, Episode 840, Average Reward: 6813.78


Trial 5:  85%|████████▌ | 851/1000 [26:48<04:45,  1.92s/it]


Trial 5, Episode 850, Average Reward: 6832.56


Trial 5:  86%|████████▌ | 861/1000 [27:08<04:30,  1.95s/it]


Trial 5, Episode 860, Average Reward: 6831.45


Trial 5:  87%|████████▋ | 871/1000 [27:28<04:16,  1.99s/it]


Trial 5, Episode 870, Average Reward: 6824.73


Trial 5:  88%|████████▊ | 881/1000 [27:47<03:41,  1.86s/it]


Trial 5, Episode 880, Average Reward: 6825.42


Trial 5:  89%|████████▉ | 891/1000 [28:06<03:34,  1.96s/it]


Trial 5, Episode 890, Average Reward: 6831.61


Trial 5:  90%|█████████ | 901/1000 [28:26<03:16,  1.99s/it]


Trial 5, Episode 900, Average Reward: 6857.65


Trial 5:  91%|█████████ | 911/1000 [28:45<02:46,  1.87s/it]


Trial 5, Episode 910, Average Reward: 6859.76


Trial 5:  92%|█████████▏| 921/1000 [29:04<02:25,  1.84s/it]


Trial 5, Episode 920, Average Reward: 6858.31


Trial 5:  93%|█████████▎| 931/1000 [29:22<02:06,  1.84s/it]


Trial 5, Episode 930, Average Reward: 6857.55


Trial 5:  94%|█████████▍| 941/1000 [29:41<01:48,  1.85s/it]


Trial 5, Episode 940, Average Reward: 6888.14


Trial 5:  95%|█████████▌| 951/1000 [29:59<01:30,  1.85s/it]


Trial 5, Episode 950, Average Reward: 6876.66


Trial 5:  96%|█████████▌| 961/1000 [30:21<01:27,  2.25s/it]


Trial 5, Episode 960, Average Reward: 6895.58


Trial 5:  97%|█████████▋| 971/1000 [30:40<00:52,  1.80s/it]


Trial 5, Episode 970, Average Reward: 6899.91


Trial 5:  98%|█████████▊| 981/1000 [30:58<00:34,  1.80s/it]


Trial 5, Episode 980, Average Reward: 6896.43


Trial 5:  99%|█████████▉| 991/1000 [31:17<00:16,  1.84s/it]


Trial 5, Episode 990, Average Reward: 6883.64


Trial 5: 100%|██████████| 1000/1000 [31:36<00:00,  1.90s/it]


Video saved as 'ppo_hopper_trial_5.mp4'

Training progress plot saved as 'ppo_training_progress.png'
