# CoDA (entropy discovery) â€” cued gridworld demo

Behavior:
- **Interest discovery** via transition entropy over *next observations* (no reward assumed)
- **One split decision per episode**
- When a cue is chosen, do a **zipper sweep over the episode suffix**:
  clone all one-step successors that have been experienced (counts exist) and redirect.
- **Merge** via soft deactivation when cue utility drops.


In [None]:
!pip -q install numpy networkx matplotlib

In [None]:
from coda_entropy_zipper_core import CoDAEntropyZipperAgent, CoDAEntropyConfig, EligibilityTraceConfig, DiscoveryConfig
from coda_envs import CuedGridworldEnv
from coda_graph_viz import grid_positions, save_snapshot_sequence, plot_snapshot
import os


In [None]:
cfg = CoDAEntropyConfig(
    et=EligibilityTraceConfig(gamma=0.95, lam=0.9, mode='first_visit'),
    disc=DiscoveryConfig(mode='entropy', min_sa_count=10.0, entropy_threshold=0.45),
    n_threshold=5.0,
    theta_split=0.80,
    theta_merge=0.40,
    zipper_sweep_only_when_new_cue=True,
)
agent = CoDAEntropyZipperAgent(cfg)

env = CuedGridworldEnv(rows=5, cols=5, cue_cell=14, seed=0)
print('cue_cell:', env.cue_cell)


In [None]:
snapshots = []
snap_every = 25
n_episodes = 300

for ep in range(n_episodes):
    obs_seq, act_seq = env.sample_episode()
    diag = agent.process_episode(obs_seq, act_seq)
    if ep % snap_every == 0:
        snapshots.append(agent.snapshot(title=f'Graph at iteration {len(snapshots)} (ep={ep})'))

final_snap = agent.snapshot(title=f'Graph at iteration final (ep={n_episodes})')
snapshots.append(final_snap)

diag, (len(agent.G.nodes), len(agent.salient))


In [None]:
pos = grid_positions(5, 5, obs_start=1)
plot_snapshot(final_snap, layout='grid', obs_pos=pos, node_label='sid', title=final_snap.title);


In [None]:
out_dir = 'cued_entropy_steps'
paths = save_snapshot_sequence(snapshots, out_dir, prefix='cued_iter', layout='grid', obs_pos=pos, node_label='sid')
paths[:3], '...'
