### Libraries

In [None]:
import gymnasium as gym
import highway_env
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
import matplotlib.pyplot as plt

### Customized configuration for the 'parking-v0' environment

In [None]:
config = {
    "observation": {
        "type": "Kinematics",
        "vehicles_count": 1,
        "features": ["x", "y", "vx", "vy", "cos_h", "sin_h"],
        "absolute": True
    },
    "action": {
        "type": "ContinuousAction"
    },
    "simulation_frequency": 15,
    "policy_frequency": 5,
    "screen_width": 600,
    "screen_height": 600,
    "centering_position": [0.5, 0.5],
    "scaling": 5.5,
    "render_agent": True,
    "vehicles_count": 1,
    "duration": 100,
    "offscreen_rendering": False
}

# Creating the environment with the configuration
env = gym.make("parking-v0", render_mode="rgb_array")
env.unwrapped.configure(config)

# Vectorizing the environment for stable training
vec_env = make_vec_env(lambda: env, n_envs=1)

### Training with PPO

Could not fix the problem below. Therefore, we could not check whether the next codes work, so we did it based on other examples.

In [None]:
policy_kwargs = dict(
    net_arch=[dict(pi=[64, 64], vf=[64, 64])]
)

model = PPO(
    policy="MultiInputPolicy",
    env=vec_env,
    verbose=1,
    tensorboard_log="./ppo_parking_tensorboard/",
    policy_kwargs=policy_kwargs
)

model.learn(total_timesteps=100_000)

### Evaluation of the trained agent

In [None]:
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Average reward: {mean_reward:.2f} +/- {std_reward:.2f}")

### Running an episode with rendering

In [None]:
obs = env.reset()[0]
done = False
total_reward = 0

while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, truncated, info = env.step(action)
    env.render()
    total_reward += reward

print(f"Total reward for the rendered episode: {total_reward:.2f}")

# Closing the environment 
env.close()