#### Tests solving Gymnasium environments with the NEAT algorithm.

In [None]:
import gymnasium as gym
import autograd.numpy as np
import sys
import os

# Add src directory to path (where 'evograd' package is located)
notebook_dir = os.path.dirname(os.path.abspath('__file__'))
root_dir = os.path.abspath(os.path.join(notebook_dir, '../..'))
src_dir = os.path.join(root_dir, 'src')
gymnasium_dir = os.path.dirname(os.path.abspath('__file__'))  # Current gymnasium directory
sys.path.insert(0, src_dir)
sys.path.insert(0, gymnasium_dir)

from evograd.run    import Config
from trial_cartpole import Trial_CartPole

The fitness of an Individual has two components:
+ the total reward returned by the environment - this 
  is the number of time steps the pole stayed vertical 
+ a penalty proportional to the absolute distance from
  the center at the end of the episode
This magnitude of this penalty is controled by the "position_penalty_coeff" factor.

This is a configuration parameter which is not specified in the generic "config_gymnasium.ini" configuration file; it must be set below, when we load configuration data.

In [None]:
# LOAD CONFIGURATION DATA
config = Config("config_gymnasium.ini")

config.position_penalty_coeff = 100  # See above (typical value 0-1000)
config.num_episodes_average   = 1000  
config.fitness_threshold      = 495  # Ajust this to fit the enviroment !

In [None]:
trial = Trial_CartPole(config)
trial.run(num_jobs=-1)  # run individual fitness evaluation in parallel

In [None]:
env_name = trial.env.spec.id
env = gym.make(env_name, render_mode='human')
observation, info = env.reset()
total_reward = 0
done = False

print("Rendering episode... (close the window to continue)")

best_individual = trial._population.get_fittest_individual()
best_individual_pruned = best_individual.prune()

while not done:
    # Get action from network
    network_output = best_individual_pruned._network.forward_pass(observation.tolist())
    action = np.argmax(network_output)
    
    # Take step
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    total_reward += reward

env.close()
print(f"Episode finished with reward: {total_reward}")