---
output-file: individual.html
title: DHPCTIndividual

---

In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
#| default_exp individual

In [3]:
# from nbdev import *
# default_exp individual

In [4]:
#hide
import sys
sys.path.append("..")
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Lambda, Multiply
import gym

# DHPCTIndividual

> Class for implementing Deep Perceptual Control Theory individuals.

In [5]:
#export
class DHPCTIndividual:
    """DHPCTIndividual encapsulates an environment and a keras model representing a PCT hierarchy."""
    
    def __init__(self, env_name, env_props=None, levels=None, activation_funcs=None, weight_types=None):
        """
        Initialize a new individual with environment and hierarchy specifications.
        
        Parameters:
        - env_name: Name of the OpenAI Gym environment to use (e.g. 'CartPole-v1')
        - env_props: Dictionary of environment properties
        - levels: List of dictionaries specifying each level in the hierarchy
        - activation_funcs: List of activation functions for each level
        - weight_types: List of weight initialization methods for each level
        """
        self.env_name = env_name
        self.env_props = env_props or {}
        self.levels = levels or []
        self.activation_funcs = activation_funcs or []
        self.weight_types = weight_types or []
        self.env = None
        self.model = None
        
    @classmethod
    def from_config(cls, config):
        """
        Create an individual from a configuration dictionary.
        
        Parameters:
        - config: Dictionary containing individual configuration
        
        Returns:
        - DHPCTIndividual instance
        """
        return cls(
            env_name=config['env_name'],
            env_props=config.get('env_props', {}),
            levels=config.get('levels', []),
            activation_funcs=config.get('activation_funcs', []),
            weight_types=config.get('weight_types', [])
        )
    
    def compile(self):
        """
        Build the environment and Keras model based on specifications.
        """
        # Initialize the environment
        self.env = gym.make(self.env_name, **self.env_props)
        
        # Get environment dimensions
        state_dim = self.env.observation_space.shape[0]
        action_dim = self.env.action_space.shape[0] if hasattr(self.env.action_space, 'shape') else self.env.action_space.n
        
        # Build Keras model
        inputs = Input(shape=(state_dim,), name='env_input')
        x = inputs
        
        # Build hierarchy levels
        for i, level_spec in enumerate(self.levels):
            # Reference signal (desired perceptual signal)
            if i == 0:
                # First level uses external input as reference
                reference = x
            else:
                # Higher levels use output from level above as reference
                reference = Dense(level_spec['units'],
                                name=f'reference_{i}',
                                activation=self.activation_funcs[i] if i < len(self.activation_funcs) else 'linear')(x)
            
            # Perception (current perceptual signal)
            perception = Dense(level_spec['units'],
                             name=f'perception_{i}',
                             activation=self.activation_funcs[i] if i < len(self.activation_funcs) else 'linear')(x)
            
            # Error (difference between reference and perception)
            error = Lambda(lambda inputs: inputs[0] - inputs[1], name=f'error_{i}')([reference, perception])
            
            # Output (action to reduce error)
            x = Dense(level_spec['units'],
                    name=f'output_{i}',
                    activation=self.activation_funcs[i] if i < len(self.activation_funcs) else 'linear')(error)
        
        # Final layer (outputs action to environment)
        outputs = Dense(action_dim, name='action', activation='tanh')(x)
        
        # Create model
        self.model = Model(inputs=inputs, outputs=outputs)
        self.model.compile(optimizer='adam', loss='mse')
        
        return self
    
    def config(self):
        """
        Return a dictionary of individual's properties.
        
        Returns:
        - Dictionary containing configuration
        """
        return {
            'env_name': self.env_name,
            'env_props': self.env_props,
            'levels': self.levels,
            'activation_funcs': self.activation_funcs,
            'weight_types': self.weight_types
        }
    
    def save_config(self, filepath):
        """
        Save the configuration to a JSON file.
        
        Parameters:
        - filepath: Path to save the configuration file
        
        Returns:
        - True if successful, False otherwise
        """
        try:
            with open(filepath, 'w') as f:
                json.dump(self.config(), f, indent=2)
            return True
        except Exception as e:
            print(f"Error saving configuration: {str(e)}")
            return False
    
    def run(self, steps=1000, train=False, early_termination=True):
        """
        Run the individual in its environment for a number of steps.
        
        Parameters:
        - steps: Maximum number of steps to run
        - train: Whether to update weights during run
        - early_termination: Whether to stop if environment signals done
        
        Returns:
        - Total reward accumulated during the run
        """
        if self.env is None:
            self.compile()
        
        state, _ = self.env.reset()
        total_reward = 0
        done = False
        
        for _ in range(steps):
            # Get action from model
            action = self.model.predict(state.reshape(1, -1))[0]
            
            # Take action in environment
            next_state, reward, done, truncated, info = self.env.step(action)
            total_reward += reward
            
            # Update model if training enabled
            if train:
                # Implement online learning logic here
                pass
            
            state = next_state
            
            if (done or truncated) and early_termination:
                break
        
        return total_reward
    
    def mate(self, other):
        """
        Create two new individuals by crossing this one with another.
        
        Parameters:
        - other: Another DHPCTIndividual to mate with
        
        Returns:
        - Two new DHPCTIndividual instances
        """
        # Implement crossover logic here
        return None, None
    
    def mutate(self, struct_prob=0.1, weight_prob=0.1):
        """
        Mutate this individual's structure and/or weights.
        
        Parameters:
        - struct_prob: Probability of structure mutation
        - weight_prob: Probability of weight mutation
        
        Returns:
        - Self (for method chaining)
        """
        # Implement mutation logic here
        return self
    
    def evaluate(self, nevals=1):
        """
        Evaluate this individual's fitness by running it multiple times.
        
        Parameters:
        - nevals: Number of evaluation runs
        
        Returns:
        - Average fitness score
        """
        scores = [self.run() for _ in range(nevals)]
        return sum(scores) / len(scores)