imports

In [1]:
import jax
from jax import numpy as jnp
from flax import nnx
import flax
import evojax

import numpy as np

import sys
sys.path.append("..")

import os

from IPython.display import Image

from models.ctm import CTM

# dynamic reload
import importlib
importlib.reload(sys.modules["models.ctm"])

<module 'models.ctm' from '/home/kevin/projects/ctm-experiments/experiments/../models/ctm.py'>

create model

In [2]:
def flatten_params(model):
    """Extract parameters as a single flattened vector."""
    # Get the model state, filtering for parameters only
    state = nnx.state(model, nnx.Param)
    
    # Flatten the parameter state
    flat_params, tree_def = jax.tree_util.tree_flatten(state)
    param_shapes = [p.shape for p in flat_params]
    
    # Pre-compute split indices as concrete values
    param_sizes = [int(np.prod(shape)) for shape in param_shapes]
    split_indices = [int(idx) for idx in np.cumsum(param_sizes[:-1])]
    
    flattened_vector = jnp.concatenate([p.flatten() for p in flat_params])
    return flattened_vector, (tree_def, param_shapes, split_indices)


def unflatten_and_set_params(model, flattened_vector, restore_info):
    """Restore parameters from a flattened vector and update the model."""
    tree_def, shapes, split_indices = restore_info
    
    # Use the pre-computed concrete split indices
    param_arrays = jnp.split(flattened_vector, split_indices)
    
    # Reshape each array back to its original shape
    reshaped_params = [arr.reshape(shape) for arr, shape in zip(param_arrays, shapes)]
    
    # Reconstruct the parameter tree
    new_param_state = jax.tree_util.tree_unflatten(tree_def, reshaped_params)
    
    # Update the model with new parameters
    nnx.update(model, new_param_state)

# Test the functions
config = {
    "iterations": 5,
    "d_model": 6,
    "d_input": 12,
    "memory_length": 5,
    "memory_hidden_dims": 6,
    "heads": 1,
    "n_synch_out": 6,
    "n_synch_action": 6,
    "out_dims": 3,
}

ctm = CTM(config, nnx.Rngs(0))

# Get original output
original_output = ctm(jnp.zeros((1, 12)))
print("Original output:", original_output)

# Flatten parameters
flattened_params, restore_info = flatten_params(ctm)
print(f"Flattened parameter vector shape: {flattened_params.shape}")

# # Modify parameters (add noise)
# modified_params = flattened_params + 0.1 * jax.random.normal(jax.random.PRNGKey(42), flattened_params.shape)

# # Set modified parameters
# unflatten_and_set_params(ctm, modified_params, restore_info)

Original output: [ 0.01526764 -0.06009712 -0.00644524]
Flattened parameter vector shape: (2286,)


In [3]:
from brax import envs
from brax.io import html

from evojax import SimManager
from evojax import ObsNormalizer
from evojax.algo import PGPE
from evojax.policy import MLPPolicy
from evojax.policy.base import PolicyState
from evojax.policy.base import PolicyNetwork
from evojax.task.cartpole import CartPoleSwingUp
from evojax.task.slimevolley import SlimeVolley
from evojax.util import create_logger
from evojax import Trainer

from functools import partial

print('jax.devices():')
jax.devices()

# Let's create a directory to save logs and models.
log_dir = '../logs'
logger = create_logger(name='EvoJAX', log_dir=log_dir)
logger.info('Testing CTM')

logger.info('Jax backend: {}'.format(jax.local_devices()))
!nvidia-smi --query-gpu=name --format=csv,noheader

class CTMPolicy(PolicyNetwork):
    def __init__(self, input_dim, output_dim, rngs=nnx.Rngs(0)):
        self.ctm = CTM({
            "iterations": 5,
            "d_model": 6,
            "d_input": input_dim,
            "memory_length": 5,
            "memory_hidden_dims": 6,
            "heads": 1,
            "n_synch_out": 6,
            "n_synch_action": 6,
            "out_dims": output_dim,
            }, rngs)
        params, restore_info = flatten_params(self.ctm)
        self.restore_info = restore_info
        self.num_params = params.shape[0]

    @partial(nnx.jit, static_argnums=(0,))
    def get_actions(self, t_states, params, p_states):
        def get_action_single(single_params, single_obs):
            # unflatten_and_set_params(self.ctm, single_params, self.restore_info)
            # return self.ctm(single_obs)
            tmp_ctm = CTM(self.ctm.config, rngs=nnx.Rngs(0))
            unflatten_and_set_params(tmp_ctm, single_params, self.restore_info)
            return tmp_ctm(jnp.expand_dims(single_obs, 0))
        
        # vmap over parameter vectors
        actions = jax.vmap(get_action_single)(params, t_states.obs)
        return actions, p_states

EvoJAX: 2025-07-29 18:29:15,194 [INFO] Testing CTM
EvoJAX: 2025-07-29 18:29:15,195 [INFO] Jax backend: [CudaDevice(id=0)]


jax.devices():
NVIDIA GeForce RTX 3060 Ti


  pid, fd = os.forkpty()


In [7]:
seed = 42

train_task = SlimeVolley(test=False, max_steps=3000)
test_task = SlimeVolley(test=True, max_steps=3000)

# # We use a feedforward network as our policy.
# # By default, MLPPolicy uses "tanh" as its activation function for the output.
# policy = MLPPolicy(
#     input_dim=train_task.obs_shape[0],
#     hidden_dims=[64, 64],
#     output_dim=train_task.act_shape[0],
#     logger=logger,
# )

policy = CTMPolicy(train_task.obs_shape[0], train_task.act_shape[0], nnx.Rngs(0))

print(train_task.obs_shape)
print(train_task.act_shape)

print(policy.num_params)

# We use PGPE as our evolution algorithm.
# If you want to know more about the algorithm, please take a look at the paper:
# https://people.idsia.ch/~juergen/nn2010.pdf 
lr = 1.0
center_learning_rate = 0.15 * lr
stdev_learning_rate = 0.1 * lr
init_stdev = 0.1 * lr

solver = PGPE(
    pop_size=64,
    param_size=policy.num_params,
    optimizer='adam',
    # center_learning_rate=0.15, # careful, needs to scale with repeats or pop size. originally .05 
    center_learning_rate=center_learning_rate,
    stdev_learning_rate=stdev_learning_rate,
    init_stdev=init_stdev,
    seed=seed,
)

train_scores = []
train_x = []
test_scores = []
test_x = []

def log_scores(current_iter, scores, stage):
    if stage == "train":
        train_scores.append(scores.mean())
        train_x.append(current_iter)
    else:
        test_scores.append(scores.mean())
        test_x.append(current_iter)

# Now that we have all the three components instantiated, we can create a
# trainer and start the training process.
trainer = Trainer(
    policy=policy,
    solver=solver,
    train_task=train_task,
    test_task=test_task,
    max_iter=1000,
    log_interval=20,
    test_interval=200,
    n_repeats=16, # duplicates
    n_evaluations=128, #128,
    seed=seed,
    log_dir=log_dir,
    logger=logger,
    log_scores_fn=log_scores,
)

_ = trainer.run()

EvoJAX: 2025-07-29 18:46:09,009 [INFO] use_for_loop=False
EvoJAX: 2025-07-29 18:46:09,023 [INFO] Start to train for 1000 iterations.


(12,)
(3,)
2286


EvoJAX: 2025-07-29 18:46:44,057 [INFO] Iter=20, size=64, max=-29.1875, avg=-30.8125, min=-32.5000, std=0.6693
EvoJAX: 2025-07-29 18:47:14,027 [INFO] Iter=40, size=64, max=-28.2500, avg=-28.7754, min=-31.8125, std=0.4284
EvoJAX: 2025-07-29 18:47:43,913 [INFO] Iter=60, size=64, max=-29.8750, avg=-30.2373, min=-30.6875, std=0.1680
EvoJAX: 2025-07-29 18:48:14,079 [INFO] Iter=80, size=64, max=-28.0625, avg=-30.2539, min=-31.4375, std=0.7106
EvoJAX: 2025-07-29 18:48:43,914 [INFO] Iter=100, size=64, max=-29.1875, avg=-31.4014, min=-37.0000, std=1.3234
EvoJAX: 2025-07-29 18:49:14,069 [INFO] Iter=120, size=64, max=-29.1250, avg=-31.0479, min=-32.9375, std=0.7745
EvoJAX: 2025-07-29 18:49:43,743 [INFO] Iter=140, size=64, max=-28.3125, avg=-30.4473, min=-32.1875, std=0.6823
EvoJAX: 2025-07-29 18:50:13,685 [INFO] Iter=160, size=64, max=-27.9375, avg=-30.6592, min=-32.7500, std=0.9414
EvoJAX: 2025-07-29 18:50:43,618 [INFO] Iter=180, size=64, max=-28.3750, avg=-29.7471, min=-32.0000, std=0.7245
EvoJA

In [10]:
from uniplot import lineplot, lineplot_, scatterplot, scatterplot_
from uniplot.canvas import *

score = lineplot(train_x, train_scores, title="Training Graph", xlabel="Training Iteration", ylabel="Average Score", color="#6699ff",name="train")
print(score)
# score2 = lineplot(test_x, test_scores, title="Testing Graph", xlabel="Training Iteration", ylabel="Average Score", color="#ff6666",name="test")
# print(score2)

                      ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀Training Graph⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀ 
                      [38;5;8m┌────────────────────────────────────────┐[0m 
                  [38;5;8m  0[0m [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;111m⢀[0m[38;5;111m⡀[0m⠀⠀[38;5;8m│[0m [38;5;111mtrain[0m
                      [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;111m⢀[0m⠀[38;5;111m⡀[0m[38;5;111m⡔[0m[38;5;111m⠈[0m[38;5;111m⠄[0m[38;5;111m⠁[0m[38;5;111m⠊[0m⠀⠀[38;5;111m⠒[0m⠀[38;5;8m│[0m 
                      [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;111m⢀[0m⠀[38;5;111m⢀[0m[38;5;111m⠄[0m[38;5;111m⠊[0m⠀[38;5;111m⠁[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m 
                      [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;111m⣀[0m[38;5;111m⢀[0m[38;5;111m⠄[0m[38;5;111m⠁[0m[38;5;111m⠈[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m 
                      [38;5;8m│[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;111m⠄[0m[38;5;111m⠊[0m⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀[38;5;8m│[0m 
                      [38;5;8m│

In [11]:
# Let's visualize the learned policy.

def render(task, algo, policy):
    """Render the learned policy."""

    task_reset_fn = jax.jit(test_task.reset)
    policy_reset_fn = jax.jit(policy.reset)
    step_fn = jax.jit(test_task.step)
    act_fn = jax.jit(policy.get_actions)

    params = algo.best_params[None, :]
    task_s = task_reset_fn(jax.random.PRNGKey(seed=seed)[None, :])
    policy_s = policy_reset_fn(task_s)

    single_task_s = jax.tree.map(lambda x: x[0], task_s)

    # images = [CartPoleSwingUp.render(task_s, 0)]
    images = [SlimeVolley.render(single_task_s, 0)]
    done = False
    step = 0
    reward = 0
    while not done:
        act, policy_s = act_fn(task_s, params, policy_s)
        task_s, r, d = step_fn(task_s, act)
        step += 1
        reward = reward + r
        done = bool(d[0])
        if step % 5 == 0:
            # images.append(CartPoleSwingUp.render(task_s, 0))
            single_task_s = jax.tree.map(lambda x: x[0], task_s)
            images.append(SlimeVolley.render(single_task_s, 0))
    print('reward={}'.format(reward))
    return images


imgs = render(test_task, solver, policy)
# gif_file = os.path.join(log_dir, 'slimevolley.gif')
# imgs[0].save(
#     gif_file, save_all=True, append_images=imgs[1:], duration=40, loop=0)
# Image(open(gif_file,'rb').read())

# display mp4
import imageio
mp4_file = os.path.join(log_dir, 'slimevolley.mp4')
imageio.mimsave(mp4_file, imgs, fps=24)
from IPython.display import Video
Video(mp4_file)



reward=[0]


  self.pid = _fork_exec(
