In [None]:
import gym_super_mario_bros
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.env_util import make_vec_env

# Set the random seed for reproducibility
set_random_seed(42)

# Create the Super Mario Bros environment
env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')

# Define the number of episodes to record for
n_episodes = 3

# Define the duration of each episode in steps
episode_duration = 500

# Wrap the environment in a DummyVecEnv
env = DummyVecEnv([lambda: env])

# Create a VecVideoRecorder to record the gameplay as a video file
env = VecVideoRecorder(env, ".", record_video_trigger=lambda x: x == 0, video_length=episode_duration, name_prefix="mario_video")

# Create an A2C model
model = A2C('MlpPolicy', env, verbose=1)

# Train the model for 10,000 steps
model.learn(total_timesteps=1500)

# Evaluate the model's performance
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)

print(f"Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}")

# Play the game with the trained model and record the gameplay as a video file
obs = env.reset()
for i in range(n_episodes):
    done = False
    episode_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        episode_reward += reward
    print(f"Episode {i+1}: reward={episode_reward}")
env.close()
