### How to create the environment (replace neat_test4 with your env name):

- conda create -n neat_test4 python=3.10 gym ipykernel pyglet
- conda activate neat_test4
- pip install neat-python
- pip install -e "<path_to_gym_location>\gym-sokoban"
- python -m ipykernel install --user --name neat_test4 --display-name "Python (neat_test4)"
- pip install graphviz

### Initial setups

In [1]:
import neat
import gym
import gym_sokoban
import pyglet
from pyglet import clock
import numpy as np
import pickle
import time
import logging
from neat.reporting import StdOutReporter
import random
import visualize
import graphviz
import os


In [2]:


## Custom rendering setup if gym's rendering is not available
class Viewer:
    def __init__(self, width, height):
        self.window = pyglet.window.Window(width, height)
        self.image = None
        self.window.on_draw = self.on_draw

    def render(self, image):
        self.image = pyglet.image.ImageData(image.shape[1], image.shape[0], 'RGB', image.tobytes(), pitch=image.shape[1] * -3)
        self.window.dispatch_event('on_draw')

    def on_draw(self):
        if self.image:
            self.window.clear()
            self.image.blit(0, 0)

In [3]:
# Custom reporter class
class CustomReporter(StdOutReporter):
    def __init__(self, show_species_detail, config_filename):
        super().__init__(show_species_detail)
        self.start_time = time.time()
        self.config_filename = config_filename
    
    def end(self):
        runtime = time.time() - self.start_time
        logging.info(f'Total runtime: {runtime:.2f} seconds')
    
    def post_evaluate(self, config, population, species_set, best_genome):
        super().post_evaluate(config, population, species_set, best_genome)
        
        # Log population's average fitness
        total_fitness = sum(genome.fitness for genome in population.values())
        avg_fitness = total_fitness / len(population)
        logging.info(f'Population\'s average fitness: {avg_fitness}')
        
        # Log adjusted fitness score
        adjusted_fitness = []
        for species_id, species in species_set.species.items():
            for genome_id in species.members:
                genome = population[genome_id]
                adjusted_fitness.append(genome.fitness / len(species.members))
        avg_adjusted_fitness = sum(adjusted_fitness) / len(adjusted_fitness)
        logging.info(f'Population\'s average adjusted fitness: {avg_adjusted_fitness}')
        
        # Log best genome information
        logging.info(f'\nBest genome:\nKey: {best_genome.key}\nFitness: {best_genome.fitness}')
        logging.info(f'Nodes:')
        for node_key, node in best_genome.nodes.items():
            logging.info(f'\t{node_key} {node}')
        logging.info(f'Connections:')
        for conn_key, conn in best_genome.connections.items():
            logging.info(f'\t{conn_key} {conn}')
        
        # Log configuration file content
        if self.config_filename:
            try:
                with open(self.config_filename, 'r') as f:
                    config_content = f.read()
                    logging.info(f'Config File:\n{config_content}')
            except FileNotFoundError:
                logging.warning(f'Config file "{self.config_filename}" not found.')

        # Log timestamp
        logging.info(f'Timestamp: {time.strftime("%Y-%m-%d %H:%M:%S")}')


In [4]:
# Initialize logging
logging.basicConfig(filename='neat_log.txt', level=logging.INFO, format='%(message)s')

#### Configs

In [5]:
# Load configuration.
config_filename = 'config-feedforward_v06'
config = neat.Config(neat.DefaultGenome, 
                     neat.DefaultReproduction, 
                     neat.DefaultSpeciesSet, 
                     neat.DefaultStagnation, 
                     config_filename)

# Check if a checkpoint exists
checkpoint_file = r'D:\Education\AI\Machine_Learning_Practice\Summer School 2024\Sokoban-SS2024\NEAT\run_config_06\second_run\neat-checkpoint-v06-49'  # Replace with your checkpoint filename

if os.path.isfile(checkpoint_file):
    # Load the checkpoint
    p = neat.Checkpointer.restore_checkpoint(checkpoint_file)
else:
    # Create the population if no checkpoint exists
    p = neat.Population(config)
    
    

# Add reporters to show progress in the terminal and log to file.
p.add_reporter(neat.StdOutReporter(True))
custom_reporter = CustomReporter(True, config_filename)
p.add_reporter(custom_reporter)
stats = neat.StatisticsReporter()
p.add_reporter(stats)
p.add_reporter(neat.Checkpointer(1, filename_prefix='neat-checkpoint-v06-'))

file_name = 'winner_test_06.pkl'


# Define episode and timestep parameters
num_episodes = 1
timesteps_per_episode = 40

current_episode = 0
current_timestep = 0

min_reward = -10

# param used to mutate a step
epsilon = 0.10

### Classes and methods

#### Preprocessing inputs/outputs

In [6]:
def process_observation(environment, obs):
        
    # Convert the observation to RGB frame or custom observation
    arr_walls, arr_goals, arr_boxes, arr_player = environment.render(mode='raw')

    # Initialize the combined array with walls (1s)
    combined = np.ones_like(arr_walls)
    
    # Set empty fields (0s)
    combined[arr_walls == 0] = 0
    
    # Set targets (3s)
    combined[arr_goals == 1] = 3
    
    # Set boxes (2s)
    combined[arr_boxes == 1] = 2
    
    # Set boxes on targets (4s)
    combined[(arr_boxes == 1) & (arr_goals == 1)] = 4
    
    # Set player position (5s)
    combined[arr_player == 1] = 5

    # Flatten the array
    flat_array = combined.flatten()
    
#     print("Flat array: ", flat_array)
#     print("Flat array shape: ", flat_array.shape)

    # Output the flattened array
    return flat_array



def process_state(state):
# Processes the initial state of env.reset()


    # Initialize the combined array with walls (0s)
    combined = np.ones_like(state[0])
    
    # Set empty fields (1s)
    combined[state[0] == 0] = 0

    # Set targets (3s)
    combined[state[1] == 1] = 3

    # Set boxes (2s)
    combined[state[2] == 1] = 2

    # Set boxes on targets (4s)
    combined[(state[2] == 1) & (state[1] == 1)] = 4

    # Set player position (5s)
    combined[state[3] == 1] = 5

    # Flatten the array
    flat_array = combined.flatten()
    
#     print("Flat array: ", flat_array)
#     print("Flat array shape: ", flat_array.shape)

    # Output the flattened array
    return flat_array

In [7]:
def map_action(output):
    return np.argmax(output)

### Run Neat logic

In [None]:
# Start the game
env = gym.make('Sokoban-small-v1')
# generate the level in the initial stage (env.reset) 
env.reset()


# # OPTIONAL
viewer = Viewer(160, 160)  # Adjust the size according to your environment
ACTION_LOOKUP = env.unwrapped.get_action_lookup()



def eval_genomes(genomes, config):

    global num_episodes, timesteps_per_episode, current_episode, current_timestep, min_reward

    # FOR EACH GENOME
    for genome_id, genome in genomes:
               
        # generate the neural network based on the config provided
        net = neat.nn.FeedForwardNetwork.create(genome, config)
    
        # define the initial fitness of the genome
        genome.fitness = 0.0
    
        # define episodes rewards (list) idea is to keep the fitness scores of all episodes and take the max
        episodes_rewards = []
        
        # FOR EACH EPISODE
        for episode in range(num_episodes):
            
            # Episode reward = 0  
            ep_reward = 0
            
            # reset the game state to the initial phase
            initial_state = env.reset()

            # map inputs suitable for the Neural Network (initial state of the game as flatten array, e.g. with length 49, created by the initial 7x7 grid)
            # used as an input layer in the Neural Network
            initial_inputs = process_state(initial_state)
            
            game_state_after_step = initial_inputs
            
            # FOR EACH STEP
            for step in range(timesteps_per_episode):
                
               
                # calculate probabilities for each output to be selected
                action_prob = net.activate(game_state_after_step)
                
                # select the action, based on the output's probabilities of being selected
                action = map_action(action_prob)
                
                # ADD CUSTOM MUTATION IN THE STEPS (at a random event, the alg could chose a random step)
                
                if random.random() < epsilon:
                    action = env.action_space.sample()
                else:
                    # select the action, based on the output's probabilities of being selected
                    action = map_action(action_prob)
                
#                 # RANDOM ACTION (replace with the genome)
#                 action = env.action_space.sample()

                # make the move in the game and output game state + info + reward
                observation, reward, done, info = env.step(action)
                
#                 # TODO (MAYBE) !!!! Adjust the fitness function
                
#                 # 1) USE THE info to extract information about the move and change the reward accordingly
                
                # penalizes the passiveness of the player; rewards movement of boxes more
                if not info["action.moved_player"]:
                    reward = -0.5                
                elif info["action.moved_box"]:
                    reward = 0.2
#                 elif ....
                
                game_state_after_step = process_observation(environment=env, obs=observation)

                # IMAGE STUFF
                image = env.render(mode='rgb_array')
                viewer.render(image)

#                 # PRINT INFO
                logging.info(f'Population\'s steps info: {(ACTION_LOOKUP[action], reward, done, info)}')

                # POPULATE THE Episode reward +=
                # if the game didn't end on this step
                if not done:
                    ep_reward += reward
                    current_timestep += 1
                    
                # if the game ended
                else:
#                     current_timestep = 0
#                     current_episode += 1
                    print(f"I BEAT THE STUPID GAME!!! Happend on step {current_timestep}")
                    break
    
                    # OR BREAK

            # add the accumulated rewards for this episode into a list (episodes_rewards)   
            episodes_rewards.append(ep_reward)

        # choose the best performance on an episode for the genome
        genome.fitness = max(episodes_rewards)

#         print("All episodes finished. Closing window.")
        viewer.window.close()  # Close the Pyglet window explicitly
        


# Run until a solution is found. The number indicates the max number of generations to be produced
winner = p.run(eval_genomes, 50)

# Display the winning genome.
print('\nBest genome:\n{!s}'.format(winner))

# SAVE THE WINNER GENOME
with open("winner.pkl", "wb") as f:
    pickle.dump(winner, f)

# Show output of the most fit genome against training data.
print('\nOutput:')
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

# Save the winner
with open(file_name, 'wb') as f:
    pickle.dump(winner, f)


  logger.warn(
  logger.warn(
  logger.warn(



 ****** Running generation 49 ****** 


 ****** Running generation 49 ****** 



  logger.deprecation(
  logger.warn(


Population's average fitness: -16.40952 stdev: 4.13746
Best fitness: -3.40000 - size: (33, 403) - species 56 - id 21666
Population's average fitness: -16.40952 stdev: 4.13746
Best fitness: -3.40000 - size: (33, 403) - species 56 - id 21666
Average adjusted fitness: 0.214
Average adjusted fitness: 0.214
Mean genetic distance 3.098, standard deviation 0.309
Mean genetic distance 3.098, standard deviation 0.309
Population of 504 members in 43 species:
   ID   age  size  fitness  adj fit  stag
     4   46    18     -3.4    0.413    14
    18   43     9     -8.5    0.154    16
    20   43    14     -5.7    0.176    19
    37   34    11     -5.8    0.224     2
    38   32    17     -4.2    0.386     9
    40   32    17     -3.5    0.323    21
    41   31     9    -12.6    0.147     0
    42   31     4     -7.7    0.114    10
    43   31     8    -13.0    0.108    15
    44   30    14     -3.4    0.278    19
    47   26    11     -9.1    0.311    18
    49   25     8    -14.5    0.135    12
 


 ****** Running generation 51 ****** 


 ****** Running generation 51 ****** 

Population's average fitness: -15.41190 stdev: 4.91562
Best fitness: -3.40000 - size: (32, 388) - species 72 - id 731
Population's average fitness: -15.41190 stdev: 4.91562
Best fitness: -3.40000 - size: (32, 388) - species 72 - id 731

Species 44 with 11 members is stagnated: removing it

Species 44 with 11 members is stagnated: removing it

Species 47 with 12 members is stagnated: removing it

Species 47 with 12 members is stagnated: removing it

Species 20 with 15 members is stagnated: removing it

Species 20 with 15 members is stagnated: removing it
Average adjusted fitness: 0.247
Average adjusted fitness: 0.247
Mean genetic distance 3.116, standard deviation 0.334
Mean genetic distance 3.116, standard deviation 0.334
Population of 491 members in 41 species:
   ID   age  size  fitness  adj fit  stag
     4   48    14     -4.2    0.233    16
    18   45     7    -16.1    0.114    18
    37   36    11    


 ****** Running generation 53 ****** 


 ****** Running generation 53 ****** 

Population's average fitness: -14.59698 stdev: 5.29324
Best fitness: -3.40000 - size: (16, 388) - species 37 - id 21068
Population's average fitness: -14.59698 stdev: 5.29324
Best fitness: -3.40000 - size: (16, 388) - species 37 - id 21068

Species 53 with 13 members is stagnated: removing it

Species 53 with 13 members is stagnated: removing it

Species 52 with 10 members is stagnated: removing it

Species 52 with 10 members is stagnated: removing it

Species 18 with 8 members is stagnated: removing it

Species 18 with 8 members is stagnated: removing it
Average adjusted fitness: 0.293
Average adjusted fitness: 0.293
Mean genetic distance 3.131, standard deviation 0.341
Mean genetic distance 3.131, standard deviation 0.341
Population of 492 members in 40 species:
   ID   age  size  fitness  adj fit  stag
     4   50    10     -6.9    0.159    18
    37   38    10     -3.4    0.266     0
    38   36    12  


 ****** Running generation 55 ****** 


 ****** Running generation 55 ****** 

Population's average fitness: -14.29736 stdev: 5.39985
Best fitness: -3.40000 - size: (35, 390) - species 75 - id 1313
Population's average fitness: -14.29736 stdev: 5.39985
Best fitness: -3.40000 - size: (35, 390) - species 75 - id 1313

Species 4 with 7 members is stagnated: removing it

Species 4 with 7 members is stagnated: removing it

Species 50 with 11 members is stagnated: removing it

Species 50 with 11 members is stagnated: removing it

Species 55 with 22 members is stagnated: removing it

Species 55 with 22 members is stagnated: removing it

Species 40 with 20 members is stagnated: removing it

Species 40 with 20 members is stagnated: removing it
Average adjusted fitness: 0.314
Average adjusted fitness: 0.314
Mean genetic distance 3.155, standard deviation 0.337
Mean genetic distance 3.155, standard deviation 0.337
Population of 500 members in 36 species:
   ID   age  size  fitness  adj fit  stag


 ****** Running generation 57 ****** 


 ****** Running generation 57 ****** 

Population's average fitness: -14.52097 stdev: 5.28000
Best fitness: -3.40000 - size: (34, 389) - species 72 - id 2807
Population's average fitness: -14.52097 stdev: 5.28000
Best fitness: -3.40000 - size: (34, 389) - species 72 - id 2807

Species 49 with 5 members is stagnated: removing it

Species 49 with 5 members is stagnated: removing it
Average adjusted fitness: 0.319
Average adjusted fitness: 0.319
Mean genetic distance 3.138, standard deviation 0.333
Mean genetic distance 3.138, standard deviation 0.333
Population of 494 members in 36 species:
   ID   age  size  fitness  adj fit  stag
    37   42    10    -11.4    0.218     4
    38   40    19     -3.8    0.438    17
    41   39     7    -14.9    0.172     8
    42   39     3    -17.7    0.077    18
    51   32    15     -4.0    0.406    16
    54   26    16     -3.4    0.460    20
    56   21    16     -3.7    0.361    16
    57   20    17     -3.8 

Population's average fitness: -14.14431 stdev: 5.62689
Best fitness: -3.10000 - size: (33, 388) - species 67 - id 3867
Population's average fitness: -14.14431 stdev: 5.62689
Best fitness: -3.10000 - size: (33, 388) - species 67 - id 3867

Species 42 with 5 members is stagnated: removing it

Species 42 with 5 members is stagnated: removing it
Average adjusted fitness: 0.329
Average adjusted fitness: 0.329
Mean genetic distance 3.134, standard deviation 0.326
Mean genetic distance 3.134, standard deviation 0.326
Population of 494 members in 38 species:
   ID   age  size  fitness  adj fit  stag
    37   44     9     -6.2    0.166     6
    38   42    13     -7.4    0.290    19
    41   41     5    -17.0    0.120    10
    51   34    14     -4.1    0.355    18
    56   23    16     -3.4    0.450    18
    57   22     9     -3.7    0.290    20
    58   21    12     -4.8    0.327    16
    59   21    14     -3.5    0.357    16
    60   19    12     -5.2    0.198    16
    61   19    14     -


 ****** Running generation 61 ****** 


 ****** Running generation 61 ****** 

Population's average fitness: -13.77510 stdev: 5.65892
Best fitness: -2.80000 - size: (28, 379) - species 59 - id 2509
Population's average fitness: -13.77510 stdev: 5.65892
Best fitness: -2.80000 - size: (28, 379) - species 59 - id 2509

Species 51 with 13 members is stagnated: removing it

Species 51 with 13 members is stagnated: removing it
Average adjusted fitness: 0.356
Average adjusted fitness: 0.356
Mean genetic distance 3.136, standard deviation 0.332
Mean genetic distance 3.136, standard deviation 0.332
Population of 489 members in 39 species:
   ID   age  size  fitness  adj fit  stag
    37   46    12     -3.4    0.269     8
    41   43     5    -16.2    0.128    12
    56   25    18     -3.4    0.378    20
    58   23    16     -4.0    0.523    18
    59   23    14     -2.8    0.429     0
    60   21     9     -5.2    0.178    18
    61   21    12     -4.4    0.305    17
    62   20     6     -4.


 ****** Running generation 63 ****** 


 ****** Running generation 63 ****** 

Population's average fitness: -13.91701 stdev: 5.69731
Best fitness: -3.20000 - size: (28, 379) - species 59 - id 2509
Population's average fitness: -13.91701 stdev: 5.69731
Best fitness: -3.20000 - size: (28, 379) - species 59 - id 2509

Species 58 with 15 members is stagnated: removing it

Species 58 with 15 members is stagnated: removing it

Species 60 with 11 members is stagnated: removing it

Species 60 with 11 members is stagnated: removing it
Average adjusted fitness: 0.362
Average adjusted fitness: 0.362
Mean genetic distance 3.125, standard deviation 0.343
Mean genetic distance 3.125, standard deviation 0.343
Population of 491 members in 38 species:
   ID   age  size  fitness  adj fit  stag
    37   48    10     -3.7    0.278    10
    41   45     5    -16.3    0.150    14
    59   25     9     -3.2    0.203     2
    61   23    14     -4.0    0.369    19
    63   21    20     -3.4    0.557    14
 


 ****** Running generation 65 ****** 


 ****** Running generation 65 ****** 

Population's average fitness: -13.08153 stdev: 5.99562
Best fitness: -3.10000 - size: (37, 369) - species 63 - id 6680
Population's average fitness: -13.08153 stdev: 5.99562
Best fitness: -3.10000 - size: (37, 369) - species 63 - id 6680

Species 66 with 14 members is stagnated: removing it

Species 66 with 14 members is stagnated: removing it
Average adjusted fitness: 0.391
Average adjusted fitness: 0.391
Mean genetic distance 3.135, standard deviation 0.358
Mean genetic distance 3.135, standard deviation 0.358
Population of 501 members in 35 species:
   ID   age  size  fitness  adj fit  stag
    37   50    13     -3.4    0.329    12
    41   47     5    -16.3    0.126    16
    59   27     9     -7.5    0.194     4
    63   23    22     -3.1    0.654    16
    65   22    13     -3.4    0.361    16
    67   21    13     -3.8    0.321     6
    68   21    18     -3.7    0.585    18
    69   20    11     -4.

Mean genetic distance 3.152, standard deviation 0.334
Mean genetic distance 3.152, standard deviation 0.334
Population of 498 members in 38 species:
   ID   age  size  fitness  adj fit  stag
    37   52    13     -4.6    0.336    14
    41   49     7     -5.0    0.221    18
    59   29     7     -4.3    0.206     6
    63   25    19     -3.7    0.429    18
    65   24    13     -4.2    0.347    18
    67   23    14     -3.8    0.346     8
    68   23    18     -3.7    0.508    20
    69   22    10     -4.0    0.290    18
    70   21    11     -5.2    0.275    14
    71   21    15     -3.4    0.435    19
    72   21    16     -4.1    0.370    16
    73   20    12     -3.8    0.302    11
    74   20    13     -5.2    0.272    18
    75   20    16     -3.8    0.551    18
    76   19     9     -4.0    0.263    14
    77   19    11     -3.7    0.178     0
    78   18    12     -4.2    0.260    13
    79   18    15     -5.2    0.305    14
    80   18    14     -3.1    0.393     0
    81   17

Mean genetic distance 3.159, standard deviation 0.343
Mean genetic distance 3.159, standard deviation 0.343
Population of 504 members in 32 species:
   ID   age  size  fitness  adj fit  stag
    37   54    14     -4.2    0.388    16
    59   31    10    -10.0    0.243     8
    63   27    23     -3.4    0.532    20
    67   25    14     -3.4    0.305    10
    70   23    13     -4.8    0.351    16
    72   23    14     -3.4    0.352    18
    73   22    16     -3.4    0.341    13
    76   21    13     -4.4    0.303    16
    77   21    11     -4.5    0.221     2
    78   20    19     -3.8    0.520    15
    79   20    14     -4.8    0.295    16
    80   20    22     -3.9    0.650     2
    81   19    11     -5.2    0.285    11
    82   17    19     -3.7    0.537    11
    83   16    18     -3.8    0.453     6
    84   14    16     -4.2    0.312    13
    85   13    18     -3.7    0.389     5
    86   11    16     -3.8    0.471     7
    87   11    17     -3.7    0.412     0
    88   10


 ****** Running generation 72 ****** 


 ****** Running generation 72 ****** 

Population's average fitness: -12.99941 stdev: 6.04120
Best fitness: -3.40000 - size: (35, 378) - species 80 - id 9369
Population's average fitness: -12.99941 stdev: 6.04120
Best fitness: -3.40000 - size: (35, 378) - species 80 - id 9369
Average adjusted fitness: 0.402
Average adjusted fitness: 0.402
Mean genetic distance 3.169, standard deviation 0.339
Mean genetic distance 3.169, standard deviation 0.339
Population of 504 members in 35 species:
   ID   age  size  fitness  adj fit  stag
    37   57    16     -4.1    0.486    19
    59   34    12     -3.8    0.254    11
    67   28    12     -3.4    0.279    13
    70   26    14     -4.0    0.320    19
    73   25    15     -3.4    0.365    16
    76   24    11     -4.0    0.266    19
    77   24    15     -3.8    0.410     1
    78   23    15     -3.4    0.411    18
    79   23    14     -4.4    0.383    19
    80   23    15     -3.4    0.364     5
    81 

In [None]:
visualize.plot_stats(stats, ylog=False, view=True)
visualize.plot_species(stats, view=True)

In [None]:
visualize.draw_net(config=config, genome=winner)

In [None]:
import neat

# Load configuration.
config_filename = 'config-feedforward_v01'
config = neat.Config(neat.DefaultGenome, 
                     neat.DefaultReproduction, 
                     neat.DefaultSpeciesSet, 
                     neat.DefaultStagnation, 
                     config_filename)

config.genome_config.add_node_mutation_prob = 0.03
config.genome_config.add_connection_mutation_prob = 0.05

# Print the parsed parameters to debug
print("Initial connection type:", config.genome_config.initial_connection)
print("Allowed connectivity options:", config.genome_config.allowed_connectivity)
print("Activation options:", config.genome_config.activation_options)


### ChatGPT

In [None]:
import neat
import gym
import gym_sokoban
import pyglet
import numpy as np

import time
import logging
from neat.reporting import StdOutReporter

# Custom rendering setup if gym's rendering is not available
class Viewer:
    def __init__(self, width, height):
        self.window = pyglet.window.Window(width, height)
        self.image = None
        self.window.on_draw = self.on_draw

    def render(self, image):
        self.image = pyglet.image.ImageData(image.shape[1], image.shape[0], 'RGB', image.tobytes(), pitch=image.shape[1] * -3)
        self.window.dispatch_event('on_draw')

    def on_draw(self):
        if self.image:
            self.window.clear()
            self.image.blit(0, 0)

# Initialize logging
logging.basicConfig(filename='neat_log.txt', level=logging.INFO, format='%(message)s')

# Custom reporter class
class CustomReporter(StdOutReporter):
    def __init__(self, show_species_detail):
        super().__init__(show_species_detail)
        self.start_time = time.time()
    
    def end(self):
        runtime = time.time() - self.start_time
        logging.info(f'Total runtime: {runtime:.2f} seconds')

    def post_evaluate(self, config, population, species_set, best_genome):
        super().post_evaluate(config, population, species_set, best_genome)
        
        # Log population's average fitness
        total_fitness = sum(genome.fitness for genome in population.values())
        avg_fitness = total_fitness / len(population)
        logging.info(f'Population\'s average fitness: {avg_fitness}')
        
        # Log adjusted fitness score
        adjusted_fitness = []
        for species_id, species in species_set.species.items():
            for genome_id in species.members:
                genome = population[genome_id]
                adjusted_fitness.append(genome.fitness / len(species.members))
        avg_adjusted_fitness = sum(adjusted_fitness) / len(adjusted_fitness)
        logging.info(f'Population\'s average adjusted fitness: {avg_adjusted_fitness}')
        
        # Log best genome information
        logging.info(f'\nBest genome:\nKey: {best_genome.key}\nFitness: {best_genome.fitness}')
        logging.info(f'Nodes:')
        for node_key, node in best_genome.nodes.items():
            logging.info(f'\t{node_key} {node}')
        logging.info(f'Connections:')
        for conn_key, conn in best_genome.connections.items():
            logging.info(f'\t{conn_key} {conn}')
        logging.info(f'Timestamp: {time.strftime("%Y-%m-%d %H:%M:%S")}')


def process_observation(environment, obs):
    # Convert the observation to RGB frame or custom observation
    arr_walls, arr_goals, arr_boxes, arr_player = environment.render(mode='raw')

    # Initialize the combined array with walls (1s)
    combined = np.ones_like(arr_walls)
    
    # Set empty fields (0s)
    combined[arr_walls == 0] = 0
    
    # Set targets (3s)
    combined[arr_goals == 1] = 3
    
    # Set boxes (2s)
    combined[arr_boxes == 1] = 2
    
    # Set boxes on targets (4s)
    combined[(arr_boxes == 1) & (arr_goals == 1)] = 4
    
    # Set player position (5s)
    combined[arr_player == 1] = 5

    # Flatten the array
    flat_array = combined.flatten()
    
    return flat_array


def process_state(state):
    # Processes the initial state of env.reset()

    # Initialize the combined array with walls (0s)
    combined = np.ones_like(state[0])
    
    # Set empty fields (1s)
    combined[state[0] == 0] = 0

    # Set targets (3s)
    combined[state[1] == 1] = 3

    # Set boxes (2s)
    combined[state[2] == 1] = 2

    # Set boxes on targets (4s)
    combined[(state[2] == 1) & (state[1] == 1)] = 4

    # Set player position (5s)
    combined[state[3] == 1] = 5

    # Flatten the array
    flat_array = combined.flatten()
    
    return flat_array


# Start the game
env = gym.make('Sokoban-small-v1')
env.reset()

# Optional viewer setup
viewer = Viewer(160, 160)  # Adjust the size according to your environment
ACTION_LOOKUP = env.unwrapped.get_action_lookup()

# Define episode and timestep parameters
num_episodes = 1
timesteps_per_episode = 40

def map_action(action_prob):
    return np.argmax(action_prob)

def eval_genomes(genomes, config):
    global num_episodes, timesteps_per_episode

    # For each genome
    for genome_id, genome in genomes:
        # Generate the neural network based on the config provided
        net = neat.nn.FeedForwardNetwork.create(genome, config)
    
        # Define the initial fitness of the genome
        genome.fitness = 0.0
    
        # Define episodes rewards (list) idea is to keep the fitness scores of all episodes and take the max
        episodes_rewards = []
        
        # For each episode
        for episode in range(num_episodes):
            # Episode reward = 0  
            ep_reward = 0
            
            # Reset the game state to the initial phase
            initial_state = env.reset()

            # Map inputs suitable for the Neural Network (initial state of the game as flatten array, e.g. with length 49, created by the initial 7x7 grid)
            # Used as an input layer in the Neural Network
            initial_inputs = process_state(initial_state)
            
            game_state_after_step = initial_inputs
            
            # For each step
            for step in range(timesteps_per_episode):
                # Calculate probabilities for each output to be selected
                action_prob = net.activate(game_state_after_step)
                
                # Select the action, based on the output's probabilities of being selected
                action = map_action(action_prob)
                
                # Make the move in the game and output game state + info + reward
                observation, reward, done, info = env.step(action)
                
                game_state_after_step = process_observation(environment=env, obs=observation)

                # Image stuff
                image = env.render(mode='rgb_array')
                viewer.render(image)

                # Print info
                print(ACTION_LOOKUP[action], reward, done, info)

                # Populate the episode reward
                if not done:
                    ep_reward += reward
                else:
                    print(f"Game finished in {step+1} steps")
                    break
    
            # Add the accumulated rewards for this episode into a list (episodes_rewards)   
            episodes_rewards.append(ep_reward)

        # Choose the best performance on an episode for the genome
        genome.fitness = max(episodes_rewards)

        print("All episodes finished. Closing window.")
        viewer.window.close()  # Close the Pyglet window explicitly

# Load configuration.
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config-feedforward')

# Create the population, which is the top-level object for a NEAT run.
p = neat.Population(config)

# Add a stdout reporter to show progress in the terminal.
p.add_reporter(neat.StdOutReporter(True))
p.add_reporter(CustomReporter(True))
p.add_reporter(neat.StatisticsReporter())

# Run until a solution is found. The number indicates the max number of generations to be produced
winner = p.run(eval_genomes, 5)

# Display the winning genome.
print('\nBest genome:\n{!s}'.format(winner))

# Show output of the most fit genome against training data.
print('\nOutput:')
winner_net = neat.nn.FeedForwardNetwork.create(winner, config)

# Start the Pyglet event loop to keep the window open
pyglet.app.run()


### BACKUP

In [None]:
# Start the game

env = gym.make('Sokoban-small-v1')
# generate the level in the initial stage (env.reset) 
env.reset()


# # OPTIONAL

# viewer = Viewer(160, 160)  # Adjust the size according to your environment

# ACTION_LOOKUP = env.unwrapped.get_action_lookup()

# Define episode and timestep parameters
num_episodes = 1
timesteps_per_episode = 40

current_episode = 0
current_timestep = 0

min_reward = -10


def eval_genomes(genomes, config)
# FOR EACH GENOME
    for genome_id, genome in genomes:
        
        env = gym.make()
    # net = neat.nn.FeedForwardNetwork.create(genome, config)
    # DEF INITIAL GENOME FITNESS = 0
    
    
    # EPISODES REWARDS = [] IDEA IS TO KEEP THE FITNESS SCORES OF ALL EPISODES AND THEN TAKE THE MAX
    
    # FOR EACH EPISODE
        # Episode reward = 0  
        # env.reset()
            
        # FOR EACH STEP
            # ACTION - GENERATED BY THE GENOME
            # RANDOM ACTION
            action = env.action_space.sample()
            
            # MAKE THE MOVE IN THE GAME
            # OUTPUT GAME STATE AFTER THE STEP WITH INFO + REWARD            
            observation, reward, done, info = env.step(action)
            
            
            # IMAGE STUFF
            image = env.render(mode='rgb_array')
            viewer.render(image)
            
            # PRINT INFO
            print(ACTION_LOOKUP[action], reward, done, info)

            # POPULATE THE Episode reward +=
            # if not done:
                # reward += MIN REWARD
                # current_timestep += 1

            # if done:
                # current_timestep = 0
                # current_episode += 1

                # OR BREAK

        # EPISODES REWARDS APPEND episode reward   
        
    
    # GENOME.FITNESS = max(EPISODE REWARDS)        
                

        

# # Load configuration.
# config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
#                      neat.DefaultSpeciesSet, neat.DefaultStagnation,
#                      'config-feedforward')

# # Create the population, which is the top-level object for a NEAT run.
# p = neat.Population(config)

# # Add a stdout reporter to show progress in the terminal.
# p.add_reporter(neat.StdOutReporter(False))

# # Run until a solution is found.
# winner = p.run(eval_genomes, 5)

# # Display the winning genome.
# print('\nBest genome:\n{!s}'.format(winner))

# # Show output of the most fit genome against training data.
# print('\nOutput:')
# winner_net = neat.nn.FeedForwardNetwork.create(winner, config)    

### Game logic

In [None]:
import gym
import gym_sokoban
import pyglet
from pyglet import clock
import numpy as np

## Custom rendering setup if gym's rendering is not available
class Viewer:
    def __init__(self, width, height):
        self.window = pyglet.window.Window(width, height)
        self.image = None
        self.window.on_draw = self.on_draw

    def render(self, image):
        self.image = pyglet.image.ImageData(image.shape[1], image.shape[0], 'RGB', image.tobytes(), pitch=image.shape[1] * -3)
        self.window.dispatch_event('on_draw')

    def on_draw(self):
        if self.image:
            self.window.clear()
            self.image.blit(0, 0)

env = gym.make('Sokoban-small-v1')
# generate the level in the initial stage (env.reset) 
env.reset()

print("Room Fixed")
print(env.room_fixed)
print(type(env.room_fixed))
print(env.room_fixed.shape)
print()
print(env.room_state)
print()
print(env.box_mapping)
print()


viewer = Viewer(160, 160)  # Adjust the size according to your environment

ACTION_LOOKUP = env.unwrapped.get_action_lookup()

# Define episode and timestep parameters
num_episodes = 2
timesteps_per_episode = 100

current_episode = 0
current_timestep = 0

def update_environment(dt):
    global current_episode, current_timestep, num_episodes, timesteps_per_episode

    if current_episode < num_episodes:
        if current_timestep < timesteps_per_episode:
            # RANDOM ACTION
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
            
            
            
            image = env.render(mode='rgb_array')
            viewer.render(image)

            print(ACTION_LOOKUP[action], reward, done, info)

            if done:
                print(f"Episode finished after {current_timestep + 1} timesteps")
                current_timestep = 0
                current_episode += 1
                env.reset()
            else:
                current_timestep += 1
        else:
            current_episode += 1
            current_timestep = 0
            env.reset()
    else:
        print("All episodes finished. Closing window.")
        viewer.window.close()  # Close the Pyglet window explicitly

# Increase the frequency to match rendering needs (e.g., 60Hz)
clock.schedule_interval(update_environment, 1/60.0)

pyglet.app.run()
