In [None]:
import tensorflow as tf
import numpy as np
import scipy.signal
from tensorflow.keras import layers, Model
import gym

# Create CartPole environment
env = gym.make('CartPole-v1')

# Hyperparameters
epochs = 1000
steps_per_epoch = 2048
train_policy_iterations = 80
train_value_iterations = 80
target_kl = 0.01
clip_ratio = 0.2
gamma = 0.99
lam = 0.95
batch_size = 64
render = False

# Initialize buffers and optimizers
observation_dim = env.observation_space.shape[0]
num_actions = env.action_space.n
buffer = Buffer(observation_dim, size=steps_per_epoch)
policy_optimizer = tf.keras.optimizers.Adam(learning_rate=3e-4)
value_optimizer = tf.keras.optimizers.Adam(learning_rate=3e-4)

# Seed for reproducibility
seed_generator = tf.random.Generator.from_seed(1337)

# Transformer Model Definitions
def build_transformer_actor(input_shape, num_actions, num_heads=2, num_layers=2, units=256):
    inputs = layers.Input(shape=input_shape)
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(units, activation='relu')(x)
    logits = layers.Dense(num_actions)(x)
    return Model(inputs=inputs, outputs=logits)

def build_transformer_critic(input_shape, num_heads=2, num_layers=2, units=256):
    inputs = layers.Input(shape=input_shape)
    x = layers.LayerNormalization()(inputs)
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
    x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    value = layers.Dense(1)(x)
    return Model(inputs=inputs, outputs=value)

# Initialize actor and critic models
actor = build_transformer_actor(input_shape=(observation_dim,), num_actions=num_actions)
critic = build_transformer_critic(input_shape=(observation_dim,))

# Function for computing log-probabilities
def logprobabilities(logits, actions):
    logprobabilities_all = tf.nn.log_softmax(logits, axis=-1)
    logprobability = tf.reduce_sum(tf.one_hot(actions, num_actions) * logprobabilities_all, axis=1)
    return logprobability

# Sample action from the actor
@tf.function
def sample_action(observation):
    logits = actor(observation)  # Actor produces logits
    action = tf.squeeze(tf.random.categorical(logits, 1), axis=1)  # Sample action
    return logits, action

# PPO Policy Update (using clipped objective)
@tf.function
def train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer):
    with tf.GradientTape() as tape:
        logits = actor(observation_buffer)  # Actor produces logits
        action_probs = tf.nn.softmax(logits)  # Get the action probabilities
        current_logprob = logprobabilities(logits, action_buffer)  # Log probability of taken actions

        ratio = tf.exp(current_logprob - logprobability_buffer)  # Policy ratio (current / old)
        min_advantage = tf.where(
            advantage_buffer > 0,
            (1 + clip_ratio) * advantage_buffer,
            (1 - clip_ratio) * advantage_buffer
        )

        # PPO clipped objective
        policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage))

    policy_grads = tape.gradient(policy_loss, actor.trainable_variables)
    policy_optimizer.apply_gradients(zip(policy_grads, actor.trainable_variables))

    # Calculate KL divergence for monitoring
    kl = tf.reduce_mean(logprobability_buffer - current_logprob)
    return kl

# PPO Value Function Update (MSE loss)
@tf.function
def train_value_function(observation_buffer, return_buffer):
    with tf.GradientTape() as tape:
        value_predictions = critic(observation_buffer)  # Critic output
        value_loss = tf.reduce_mean(tf.square(return_buffer - value_predictions))  # MSE loss

    value_grads = tape.gradient(value_loss, critic.trainable_variables)
    value_optimizer.apply_gradients(zip(value_grads, critic.trainable_variables))

# Discounted cumulative sums for advantages and returns
def discounted_cumulative_sums(x, discount):
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]

# Buffer Class for storing trajectories
class Buffer:
    def __init__(self, observation_dimensions, size, gamma=0.99, lam=0.95):
        self.observation_buffer = np.zeros((size, observation_dimensions), dtype=np.float32)
        self.action_buffer = np.zeros(size, dtype=np.int32)
        self.advantage_buffer = np.zeros(size, dtype=np.float32)
        self.reward_buffer = np.zeros(size, dtype=np.float32)
        self.return_buffer = np.zeros(size, dtype=np.float32)
        self.value_buffer = np.zeros(size, dtype=np.float32)
        self.logprobability_buffer = np.zeros(size, dtype=np.float32)
        self.gamma, self.lam = gamma, lam
        self.pointer, self.trajectory_start_index = 0, 0

    def store(self, observation, action, reward, value, logprobability):
        self.observation_buffer[self.pointer] = observation
        self.action_buffer[self.pointer] = action
        self.reward_buffer[self.pointer] = reward
        self.value_buffer[self.pointer] = value
        self.logprobability_buffer[self.pointer] = logprobability
        self.pointer += 1

    def finish_trajectory(self, last_value=0):
        path_slice = slice(self.trajectory_start_index, self.pointer)
        rewards = np.append(self.reward_buffer[path_slice], last_value)
        values = np.append(self.value_buffer[path_slice], last_value)

        deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
        self.advantage_buffer[path_slice] = discounted_cumulative_sums(deltas, self.gamma * self.lam)
        self.return_buffer[path_slice] = discounted_cumulative_sums(rewards, self.gamma)[:-1]

        self.trajectory_start_index = self.pointer

    def get(self):
        self.pointer, self.trajectory_start_index = 0, 0
        advantage_mean, advantage_std = np.mean(self.advantage_buffer), np.std(self.advantage_buffer)
        self.advantage_buffer = (self.advantage_buffer - advantage_mean) / advantage_std
        return (
            self.observation_buffer,
            self.action_buffer,
            self.advantage_buffer,
            self.return_buffer,
            self.logprobability_buffer,
        )

# Training Loop
def train_ppo(agent, env, epochs=1000, steps_per_epoch=2048, train_policy_iterations=80, train_value_iterations=80, target_kl=0.01):
    state = env.reset()
    episode_return = 0
    episode_length = 0
    num_episodes = 0

    # Loop over epochs
    for epoch in range(epochs):
        sum_return = 0
        sum_length = 0
        num_episodes = 0

        # Iterate over the steps of each epoch
        for t in range(steps_per_epoch):
            if render:
                env.render()

            # Reshape observation for transformer models
            observation = observation.reshape(1, -1)
            logits, action = sample_action(observation)  # Sample action from the actor
            observation_new, reward, done, _, _ = env.step(action[0].numpy())  # Take step in the environment
            episode_return += reward
            episode_length += 1

            # Get value and log-probability of action from the critic
            value_t = critic(observation)  # Get value estimate from the critic
            logprobability_t = logprobabilities(logits, action)  # Log-probability for action

            # Store data in the buffer
            buffer.store(observation, action, reward, value_t, logprobability_t)

            # Update observation
            observation = observation_new

            # Finish trajectory if terminal state reached
            if done or (t == steps_per_epoch - 1):
                last_value = 0 if done else critic(observation.reshape(1, -1))  # Last value from the critic
                buffer.finish_trajectory(last_value)  # Finish trajectory
                sum_return += episode_return
                sum_length += episode_length
                num_episodes += 1
                observation, _ = env.reset()
                episode_return, episode_length = 0, 0

        # Get data from the buffer
        observation_buffer, action_buffer, advantage_buffer, return_buffer, logprobability_buffer = buffer.get()

        # Update the policy
        for _ in range(train_policy_iterations):
            kl = train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer)
            if kl > 1.5 * target_kl:  # Early stopping based on KL divergence
                break

        # Update the value function
        for _ in range(train_value_iterations):
            train_value_function(observation_buffer, return_buffer)

        # Print stats for the epoch
        print(f"Epoch: {epoch + 1}. Mean Return: {sum_return / num_episodes}. Mean Length: {sum_length / num_episodes}")

# Train PPO Agent on CartPole
train_ppo(agent=None, env=env)  # Pass the environment and agent to the training loop



  deprecation(
  deprecation(


NameError: name 'Buffer' is not defined

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers
import gym
import scipy.signal

# Environment setup
env = gym.make("CartPole-v1")

# Hyperparameters
gamma = 0.99  # Discount factor
lam = 0.95  # GAE lambda
clip_ratio = 0.2  # PPO clipping ratio
epochs = 1000
steps_per_epoch = 4000
train_policy_iterations = 80
train_value_iterations = 80
target_kl = 0.01  # Early stopping based on KL divergence

# Buffer for storing experiences
class Buffer:
    def __init__(self, observation_dimensions, size, gamma=0.99, lam=0.95):
        self.observation_buffer = np.zeros((size, observation_dimensions), dtype=np.float32)
        self.action_buffer = np.zeros(size, dtype=np.int32)
        self.reward_buffer = np.zeros(size, dtype=np.float32)
        self.value_buffer = np.zeros(size, dtype=np.float32)
        self.logprobability_buffer = np.zeros(size, dtype=np.float32)
        self.advantage_buffer = np.zeros(size, dtype=np.float32)
        self.return_buffer = np.zeros(size, dtype=np.float32)
        self.gamma, self.lam = gamma, lam
        self.pointer, self.trajectory_start_index = 0, 0

    def store(self, observation, action, reward, value, logprobability):
        self.observation_buffer[self.pointer] = observation
        self.action_buffer[self.pointer] = action
        self.reward_buffer[self.pointer] = reward
        self.value_buffer[self.pointer] = value
        self.logprobability_buffer[self.pointer] = logprobability
        self.pointer += 1

    def finish_trajectory(self, last_value=0):
        path_slice = slice(self.trajectory_start_index, self.pointer)
        rewards = np.append(self.reward_buffer[path_slice], last_value)
        values = np.append(self.value_buffer[path_slice], last_value)
        deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
        self.advantage_buffer[path_slice] = discounted_cumulative_sums(deltas, self.gamma * self.lam)
        self.return_buffer[path_slice] = discounted_cumulative_sums(rewards, self.gamma)[:-1]
        self.trajectory_start_index = self.pointer

    def get(self):
        self.pointer, self.trajectory_start_index = 0, 0
        advantage_mean, advantage_std = np.mean(self.advantage_buffer), np.std(self.advantage_buffer)
        self.advantage_buffer = (self.advantage_buffer - advantage_mean) / advantage_std
        return (self.observation_buffer, self.action_buffer, self.advantage_buffer, self.return_buffer, self.logprobability_buffer)


# Discounted cumulative sum for rewards
def discounted_cumulative_sums(x, discount):
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]


# Transformer Actor model
def build_transformer_actor(input_shape, num_actions, num_heads=2, num_layers=2, units=256):
    inputs = layers.Input(shape=input_shape)
    x = layers.LayerNormalization()(inputs)
    for _ in range(num_layers):
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
        x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    logits = layers.Dense(num_actions)(x)
    return Model(inputs=inputs, outputs=logits)


# Transformer Critic model
def build_transformer_critic(input_shape, units=256, num_heads=2, num_layers=2):
    inputs = layers.Input(shape=input_shape)
    x = layers.LayerNormalization()(inputs)
    for _ in range(num_layers):
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
        x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    value = layers.Dense(1)(x)
    return Model(inputs=inputs, outputs=value)


# Log-probabilities computation for discrete actions
def logprobabilities(logits, a):
    logprobabilities_all = tf.math.log_softmax(logits)
    logprobability = tf.reduce_sum(tf.one_hot(a, num_actions) * logprobabilities_all, axis=1)
    return logprobability


# Sample action from actor
@tf.function
def sample_action(observation):
    logits = actor(observation)
    action = tf.squeeze(tf.random.categorical(logits, 1), axis=1)
    return logits, action


# PPO Policy Gradient Update
@tf.function
def train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer):
    with tf.GradientTape() as tape:
        ratio = tf.exp(logprobabilities(actor(observation_buffer), action_buffer) - logprobability_buffer)
        min_advantage = tf.where(advantage_buffer > 0, (1 + clip_ratio) * advantage_buffer, (1 - clip_ratio) * advantage_buffer)
        policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage))

    policy_grads = tape.gradient(policy_loss, actor.trainable_variables)
    policy_optimizer.apply_gradients(zip(policy_grads, actor.trainable_variables))

    kl = tf.reduce_mean(logprobability_buffer - logprobabilities(actor(observation_buffer), action_buffer))
    return kl


# Value Function Update
@tf.function
def train_value_function(observation_buffer, return_buffer):
    with tf.GradientTape() as tape:
        value_loss = tf.reduce_mean((return_buffer - critic(observation_buffer)) ** 2)

    value_grads = tape.gradient(value_loss, critic.trainable_variables)
    value_optimizer.apply_gradients(zip(value_grads, critic.trainable_variables))


# Training loop for PPO agent
def train_ppo(agent, env, epochs=epochs):
    observation_shape = env.observation_space.shape
    num_actions = env.action_space.n

    # Initialize models, optimizers, and buffers
    global actor, critic, policy_optimizer, value_optimizer
    actor = build_transformer_actor(observation_shape, num_actions)
    critic = build_transformer_critic(observation_shape)
    policy_optimizer = optimizers.Adam(learning_rate=3e-4)
    value_optimizer = optimizers.Adam(learning_rate=1e-3)

    buffer = Buffer(observation_shape[0], size=steps_per_epoch)

    # Train loop
    for epoch in range(epochs):
        sum_return = 0
        sum_length = 0
        num_episodes = 0
        observation = env.reset()

        for t in range(steps_per_epoch):
            # Sample action and take a step in the environment
            observation = observation.reshape(1, -1)
            logits, action = sample_action(observation)
            observation_new, reward, done, _, _ = env.step(action.numpy()[0])
            episode_return += reward
            episode_length += 1

            # Get value and log-probability of the action
            value_t = critic(observation)
            logprobability_t = logprobabilities(logits, action)

            # Store experience in the buffer
            buffer.store(observation, action, reward, value_t, logprobability_t)

            # Update observation
            observation = observation_new

            # If done, finish trajectory and reset environment
            if done or t == steps_per_epoch - 1:
                last_value = 0 if done else critic(observation.reshape(1, -1))
                buffer.finish_trajectory(last_value)
                sum_return += episode_return
                sum_length += episode_length
                num_episodes += 1
                observation = env.reset()
                episode_return, episode_length = 0, 0

        # Get data from the buffer
        (
            observation_buffer,
            action_buffer,
            advantage_buffer,
            return_buffer,
            logprobability_buffer,
        ) = buffer.get()

        # Update policy with PPO
        for _ in range(train_policy_iterations):
            kl = train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer)
            if kl > 1.5 * target_kl:  # Early stopping based on KL divergence
                break

        # Update value function
        for _ in range(train_value_iterations):
            train_value_function(observation_buffer, return_buffer)

        # Print stats for the epoch
        print(f"Epoch: {epoch + 1}. Mean Return: {sum_return / num_episodes}. Mean Length: {sum_length / num_episodes}")


# Train the PPO agent on CartPole environment
train_ppo(agent=None, env=env)



ValueError: Input 0 of layer "global_average_pooling1d" is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 4)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model, optimizers
import gym
import scipy.signal

# Environment setup
env = gym.make("CartPole-v1")

# Hyperparameters
gamma = 0.99  # Discount factor
lam = 0.95  # GAE lambda
clip_ratio = 0.2  # PPO clipping ratio
epochs = 1000
steps_per_epoch = 4000
train_policy_iterations = 80
train_value_iterations = 80
target_kl = 0.01  # Early stopping based on KL divergence

# Initialize episode return and length
episode_return = 0
episode_length = 0

# Buffer for storing experiences
class Buffer:
    def __init__(self, observation_dimensions, size, gamma=0.99, lam=0.95):
        self.observation_buffer = np.zeros((size, observation_dimensions), dtype=np.float32)
        self.action_buffer = np.zeros(size, dtype=np.int32)
        self.reward_buffer = np.zeros(size, dtype=np.float32)
        self.value_buffer = np.zeros(size, dtype=np.float32)
        self.logprobability_buffer = np.zeros(size, dtype=np.float32)
        self.advantage_buffer = np.zeros(size, dtype=np.float32)
        self.return_buffer = np.zeros(size, dtype=np.float32)
        self.gamma, self.lam = gamma, lam
        self.pointer, self.trajectory_start_index = 0, 0

    def store(self, observation, action, reward, value, logprobability):
        self.observation_buffer[self.pointer] = observation
        self.action_buffer[self.pointer] = action
        self.reward_buffer[self.pointer] = reward
        self.value_buffer[self.pointer] = value
        self.logprobability_buffer[self.pointer] = logprobability
        self.pointer += 1

    def finish_trajectory(self, last_value=0):
        path_slice = slice(self.trajectory_start_index, self.pointer)
        rewards = np.append(self.reward_buffer[path_slice], last_value)
        values = np.append(self.value_buffer[path_slice], last_value)
        deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
        self.advantage_buffer[path_slice] = discounted_cumulative_sums(deltas, self.gamma * self.lam)
        self.return_buffer[path_slice] = discounted_cumulative_sums(rewards, self.gamma)[:-1]
        self.trajectory_start_index = self.pointer

    def get(self):
        self.pointer, self.trajectory_start_index = 0, 0
        advantage_mean, advantage_std = np.mean(self.advantage_buffer), np.std(self.advantage_buffer)
        self.advantage_buffer = (self.advantage_buffer - advantage_mean) / advantage_std
        return (self.observation_buffer, self.action_buffer, self.advantage_buffer, self.return_buffer, self.logprobability_buffer)


# Discounted cumulative sum for rewards
def discounted_cumulative_sums(x, discount):
    return scipy.signal.lfilter([1], [1, float(-discount)], x[::-1], axis=0)[::-1]


# Transformer Actor model
def build_transformer_actor(input_shape, num_actions, num_heads=2, num_layers=2, units=256):
    inputs = layers.Input(shape=input_shape)
    x = layers.Reshape((1, input_shape[0]))(inputs)  # Reshape to (batch_size, 1, features)
    x = layers.LayerNormalization()(x)
    for _ in range(num_layers):
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
        x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)  # Pool across the sequence dimension
    logits = layers.Dense(num_actions)(x)
    return Model(inputs=inputs, outputs=logits)


# Transformer Critic model
def build_transformer_critic(input_shape, units=256, num_heads=2, num_layers=2):
    inputs = layers.Input(shape=input_shape)
    x = layers.Reshape((1, input_shape[0]))(inputs)  # Reshape to (batch_size, 1, features)
    x = layers.LayerNormalization()(x)
    for _ in range(num_layers):
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=units)(x, x)
        x = layers.LayerNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)  # Pool across the sequence dimension
    value = layers.Dense(1)(x)
    return Model(inputs=inputs, outputs=value)


# Log-probabilities computation for discrete actions
def logprobabilities(logits, a):
    logprobabilities_all = tf.math.log_softmax(logits)
    logprobability = tf.reduce_sum(tf.one_hot(a, num_actions) * logprobabilities_all, axis=1)
    return logprobability


# Sample action from actor
@tf.function
def sample_action(observation):
    logits = actor(observation)
    action = tf.squeeze(tf.random.categorical(logits, 1), axis=1)
    return logits, action


# PPO Policy Gradient Update
@tf.function
def train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer):
    with tf.GradientTape() as tape:
        ratio = tf.exp(logprobabilities(actor(observation_buffer), action_buffer) - logprobability_buffer)
        min_advantage = tf.where(advantage_buffer > 0, (1 + clip_ratio) * advantage_buffer, (1 - clip_ratio) * advantage_buffer)
        policy_loss = -tf.reduce_mean(tf.minimum(ratio * advantage_buffer, min_advantage))

    policy_grads = tape.gradient(policy_loss, actor.trainable_variables)
    policy_optimizer.apply_gradients(zip(policy_grads, actor.trainable_variables))

    kl = tf.reduce_mean(logprobability_buffer - logprobabilities(actor(observation_buffer), action_buffer))
    return kl


# Value Function Update
@tf.function
def train_value_function(observation_buffer, return_buffer):
    with tf.GradientTape() as tape:
        value_loss = tf.reduce_mean((return_buffer - critic(observation_buffer)) ** 2)

    value_grads = tape.gradient(value_loss, critic.trainable_variables)
    value_optimizer.apply_gradients(zip(value_grads, critic.trainable_variables))


# Training loop for PPO agent
def train_ppo(agent, env, epochs=epochs):
    observation_shape = env.observation_space.shape
    num_actions = env.action_space.n

    # Initialize models, optimizers, and buffers
    global actor, critic, policy_optimizer, value_optimizer
    actor = build_transformer_actor(observation_shape, num_actions)
    critic = build_transformer_critic(observation_shape)
    policy_optimizer = optimizers.Adam(learning_rate=3e-4)
    value_optimizer = optimizers.Adam(learning_rate=1e-3)

    buffer = Buffer(observation_shape[0], size=steps_per_epoch)

    # Train loop
    for epoch in range(epochs):
        sum_return = 0
        sum_length = 0
        num_episodes = 0
        observation = env.reset()

        for t in range(steps_per_epoch):
            # Sample action and take a step in the environment
            observation = observation.reshape(1, -1)
            logits, action = sample_action(observation)
            observation_new, reward, done, info = env.step(action.numpy()[0])
            episode_return += reward
            episode_length += 1

            # Get value and log-probability of the action
            value_t = critic(observation)
            logprobability_t = logprobabilities(logits, action)

            # Store experience in the buffer
            buffer.store(observation, action, reward, value_t, logprobability_t)

            # Update observation
            observation = observation_new

            # If done, finish trajectory and reset environment
            if done or t == steps_per_epoch - 1:
                last_value = 0 if done else critic(observation.reshape(1, -1))
                buffer.finish_trajectory(last_value)
                sum_return += episode_return
                sum_length += episode_length
                num_episodes += 1
                observation = env.reset()
                episode_return, episode_length = 0, 0

        # Get data from the buffer
        (
            observation_buffer,
            action_buffer,
            advantage_buffer,
            return_buffer,
            logprobability_buffer,
        ) = buffer.get()

        # Update policy with PPO
        for _ in range(train_policy_iterations):
            kl = train_policy(observation_buffer, action_buffer, logprobability_buffer, advantage_buffer)
            if kl > 1.5 * target_kl:
                break

        # Update value function
        for _ in range(train_value_iterations):
            train_value_function(observation_buffer, return_buffer)

        # Print progress
        print(f"Epoch {epoch + 1} - Mean Return: {sum_return / num_episodes}, Mean Length: {sum_length / num_episodes}")

# Start training PPO agent
train_ppo(agent=None, env=env)


  deprecation(
  deprecation(
  if not isinstance(terminated, (bool, np.bool8)):


UnboundLocalError: local variable 'episode_return' referenced before assignment