In [13]:
import gym
import tensorflow as tf
import numpy as np

# Define your custom softmax and KLD functions
def my_softmax(x):
    return tf.keras.activations.softmax(x, axis=-1)

def my_kld(y_true, y_pred):
    epsilon = 1e-10
    y_true = tf.clip_by_value(y_true, epsilon, 1)
    y_pred = tf.clip_by_value(y_pred, epsilon, 1)
    return tf.reduce_sum(y_true * tf.math.log(y_true / y_pred), axis=[1, 2, 3])

# Load the trained AGIL model
model_path = r"./model.hdf5"
model = tf.keras.models.load_model(model_path, custom_objects={'my_softmax': my_softmax, 'my_kld': my_kld})

# Inspect model input and output specifications
print("Model Inputs:", model.inputs)
print("Model Input Shapes:", [input.shape for input in model.inputs])
print("Model Outputs:", model.outputs)
print("Model Output Shapes:", [output.shape for output in model.outputs])

# Initialize the Ms. Pacman environment with render_mode='human'
env = gym.make("ALE/Breakout-v5", render_mode='human')
obs = env.reset()

def preprocess_observation(observation):
    if isinstance(observation, tuple):
        observation = np.array(observation[0])
    else:
        observation = np.array(observation)
    
    if len(observation.shape) == 3:
        observation = tf.image.rgb_to_grayscale(observation)
        observation = tf.image.resize(observation, [84, 84])
        observation = tf.cast(observation, tf.float32) / 255.0
        observation = tf.expand_dims(observation, axis=0)
    else:
        raise ValueError("Observation has an unexpected shape.")
    
    return observation

def select_action(model, observation):
    processed_obs = preprocess_observation(observation)
    
    # Create placeholders for the second input if needed
    additional_input = np.zeros((1, 84, 84, 1))  # Example shape, adjust as necessary
    
    # Combine the inputs into a list
    inputs = [processed_obs, additional_input]

    # Predict and suppress the progress bar
    outputs = model.predict(inputs, verbose=0)
    
    # Extract action probabilities (assuming it's the second output)
    action_probs = outputs[1]  # Adjust index based on your model's output order
    
    # print("Action Probabilities Shape:", action_probs.shape)
    
    # Assuming action_probs should be a 2D array [batch_size, num_actions]
    if action_probs.ndim == 2:
        action = np.argmax(action_probs[0])  # Take the first batch element
    else:
        raise ValueError("Unexpected shape for action probabilities.")
    
    return action


for episode in range(10):
    obs = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        env.render()
        action = select_action(model, obs)
        obs, reward, done, info = env.step(action)
        total_reward += reward
    
    print(f"Episode {episode + 1}: Total Reward = {total_reward}")

env.close()


Model Inputs: [<KerasTensor: shape=(None, 84, 84, 1) dtype=float32 (created by layer 'input_2')>, <KerasTensor: shape=(None, 84, 84, 1) dtype=float32 (created by layer 'input_1')>]
Model Input Shapes: [TensorShape([None, 84, 84, 1]), TensorShape([None, 84, 84, 1])]
Model Outputs: [<KerasTensor: shape=(None, 18) dtype=float32 (created by layer 'prob')>]
Model Output Shapes: [TensorShape([None, 18])]


RuntimeError: Failed to initialize SDL