In [None]:

import json          
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import numpy as np
import sys
from pathlib import Path
from rich.console import Console
# from bokeh.plotting import output_notebook
from IPython.display import HTML                                                                                                                                           
import warnings
import canonical_toolkit as ctk
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
DATA_FOLDER = None
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
warnings.filterwarnings("ignore", message="n_jobs value.*overridden.*")                                       
# output_notebook()                                                                                             
console = Console()                                                                                           
                                                                                                            
# Step out of analysis/ folder first                                                                          
base_folder = Path.cwd().parent                                                                               
                                                                                                            
# 1. Check if DATA_FOLDER is already set                                                                      
if DATA_FOLDER:                                                                                               
    data_path = Path(DATA_FOLDER)                                                                             
    ea_folder = data_path.parent.parent  # __data__/run_xxx -> ea/                                            
                                                                                                            
# 2. Try to find run_history.csv in parent (ea/ folder)                                                       
elif (base_folder / "run_history.csv").exists():                                                              
    df = pd.read_csv(base_folder / "run_history.csv")                                                         
    data_path = base_folder / df["output_folder"].iloc[-1]                                                    
    ea_folder = base_folder                                                                                   
                                                                                                            
# 3. Assume notebook is inside output_folder/analysis/, walk up to find config.py                             
else:                                                                                                         
    ea_folder = base_folder                                                                                   
    while ea_folder != ea_folder.parent:                                                                      
        if (ea_folder / "config.py").exists():                                                                
            break                                                                                             
        ea_folder = ea_folder.parent                                                                          
    else:                                                                                                     
        raise FileNotFoundError("Could not find ea/config.py in any parent directory")                        
    data_path = base_folder  # output_folder is parent of analysis/                                           
                                                                                                            
sys.path.insert(0, str(ea_folder.parent))                                                                     
from ea.config import Config                                                                                  
                                                                                                            
print(data_path)                                                                                              
data = pd.read_sql("SELECT * FROM individual", sqlite3.connect(data_path / "database.db"))                    
config = Config.load(data_path)                                                                               
config.large_description()

In [None]:
archive = ctk.SimilarityArchive.load(
    frame_folder_path=f"{config.OUTPUT_FOLDER}/feature_frames",
    db_file_path=f"{config.OUTPUT_FOLDER}/database.db"
)

In [None]:
tags_expanded = data['tags_'].apply(lambda x: json.loads(x) if isinstance(x, str) else x).apply(pd.Series)                                        
data = pd.concat([data, tags_expanded], axis=1)                                                                                                   
                                                                                                                                                                                                                                                     
data['gen'] = data.apply(                                                                                                                         
    lambda row: list(range(int(row['time_of_birth']), int(row['time_of_death']) + 1)),                                                            
    axis=1                                                                                                                                        
)

In [None]:
gen_df = (data                                                                                                                                                     
    .explode('gen')                                                                                                                                                
    .rename(columns={'fitness_': 'fitness', 'genotype_': 'genotype', 'tags_': 'tags'})                                                                             
    .sort_values(['gen', 'ctk_string'], ascending=[True, True])                                                                                                    
)                                                                                                                                                                                                                                                               
gen_df['rank'] = gen_df.groupby('gen').cumcount()                                                                                                                  
gen_df = gen_df.set_index(['gen', 'rank'])

survivors_df = gen_df.reset_index()
survivors_df = survivors_df[survivors_df['time_of_death'] > survivors_df['gen']]
survivors_df = survivors_df.set_index(['gen', 'rank'])

In [None]:
survivors_df.head()

In [None]:
if gen_df['image'].empty:
    raise AttributeError("This interactive notebook requires img data")

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plot_generation = config.NUM_GENERATIONS-1
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
norm_nov = mcolors.Normalize(
    vmin=survivors_df['novelty'].min(), 
    vmax=survivors_df['novelty'].max()
)
cmap_nov = plt.get_cmap('rainbow')

# Create the new color series
survivors_df['novelty_color'] = [
    mcolors.to_hex(cmap_nov(norm_nov(n))) for n in survivors_df['novelty']
]

unique_colors_df = survivors_df[['id', 'novelty_color']].drop_duplicates(subset='id')

In [None]:
num_samples = 3
umap_n = 5

DEFAULT_SIZE = 5

In [None]:
CELL_SIZE = 150
DEFAULT_SIZE = 10

everyone = archive

alive = archive.alive_only(inplace=False)


### r [AGG] space [AGG]

In [None]:
to_fit_rsagg = alive.map('to_cumulative').map('aggregate')

grid = ctk.TransformerGrid(to_fit_rsagg[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=umap_n,
    random_state=43,
    transform_seed=43,
    n_jobs=1
).get_umap()  
print(umap)
grid[:,:] = umap

In [None]:
subset = to_fit_rsagg.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = to_fit_rsagg[:, :, ::(config.NUM_GENERATIONS // num_samples)]
to_embed.transform_grid(grid)

1 gen [END], all cumul r [agg], 1 space [agg]

In [None]:
to_plot_embed = to_embed[2, :, -1]


umap_plotter = ctk.BokehGridPlotter()

umap_plotter.config.image_markers = True
umap_plotter.config.clean_axes = True

umap_plotter.config.default_size = DEFAULT_SIZE
umap_plotter.config.plot_height = CELL_SIZE * 5
umap_plotter.config.plot_width = CELL_SIZE * 5
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids()
)
umap_plotter.add_id_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
# umap_plotter.apply_jitter()
umap_plotter.add_id_styling(ids=unique_colors_df['id'], colors=unique_colors_df['novelty_color'])
umap_plotter.transpose()
umap_plotter.show(super_title=f"1 gen [START], all cumul r [agg], 1 space [agg],  umap_n {umap_n}")

In [None]:
import copy                                                                                            
                                                                                                        
new_strings = [                                                                                        
    "C[4-(HBH2B)]",                                                                                    
    "C[fb(H2B)l(HBHB[rl(H2B)])]",                                                                      
    "C[f(H2B)b(H2H2BH2B)l(HBHB[rl(H2B)])]",                                                            
    "C[l(H2B)fbr(HBH2BH2B)]",                                                                          
    "C[f(H2BH2BH2BH2BH2BH2BH2BH2)]",                                                                   
]                                                                                                      
           
new_strings_2 = [                                                                                      
      "C[l(H2H3)b(H3)f(H4)]<b(H2H1H7H3)t(H4HH3H3)>",                                                     
      "C[b(B7)r(B[r(B4[t(B2)b(H7)])]B1B2)]",                                                             
      "C[l(H7H2B5[b(H3)]H4)]<b(HH2B5[r(B5)b(H1)]H7)>",                                                   
      "C[f(B5[b(B1[r(H2)])r(H3)]B7[l(B3[b(H4)])])l(B7[r(B5[l(B6[l(H6)])])]B5)]",                         
      "C[l(H3H)f(H5)]<b(B4)t(H6)>",                                                                      
      "C[f(B2)l(H1)r(H1B1[l(B1)r(B1[r(B2[l(B7[l(H7)])])l(B7)])])]<b(H)t(H3)>",                           
      "C[f(H2)r(H6)l(H7B7B4)b(HH6HH6)]<b(H5)>",                                                          
      "C[f(B3[b(B7B[l(B1[r(B4)])b(B4[t(H6B5[t(HH)])])t(H7)])])]<b(H6H1)>",                               
      "C[r(H1B4[t(B1[r(B5[t(B3[t(H6)])b(HB3)])])])]<b(B4)>",                                             
      "C[f(H7)r(HB7[l(B2)b(B2[r(B5[t(B1[t(H7H3)])])])])]<b(H1)>",                                        
      "C[f(B[t(H6B1B1[r(H7B3[l(HH6)]B7)]B1H2)]H7H4H4)br(H4)]",                                           
      "C[l(H2B1[t(B7[b(B2H7H3B3[t(H4)])l(B6)]H7H3)]H5)]<t(H3)>",                                         
      "C[r(H4H7B6[t(H2)])f(H5)]<b(H7H3H5B2[t(B1[l(B7)]B)b(H7)])>",                                       
      "C[b(H2)]<b(B6H4B[t(B7[l(H4)]H2)]H4B1[r(B3[r(B6)l(B6[t(B4)])])])>",                                
      "C[r(B1[t(H2H1)])l(B2H1)f(H4H1)]<b(B7[t(B5[r(H4)t(H6)])r(H4)])>",                                  
  ] 
                                                                                                        
# 1. Parse into node trees & build a feature frame                                                     
new_nodes = [ctk.node_from_string(s) for s in new_strings]                                             
sim_configs = [                                                                                        
    ctk.SimilaritySpaceConfig(                                                                         
        space=ctk.Space(s) if isinstance(s, str) else s,                                               
        max_hop_radius=config.MAX_HOP_RADIUS,                                                          
        skip_empty=config.SKIP_EMPTY                                                                   
    )                                                                                                  
    for s in config.SAVE_SPACES                                                                        
]                                                                                                      
                                                                                                        
# Then use sim_configs instead of config.SIM_CONFIGS in the injection cell:                              
                                                                                                        
new_series = [                                                                                         
    ctk.series_from_node_population(new_nodes, space_config=sc)                                        
    for sc in sim_configs                                                                              
]                                                                                                     
new_frame = ctk.SimilarityFrame(series=new_series)                                                     
                                                                                                        
# 2. Deep copy the archive so we don't mutate the original                                             
injected_archive = archive.replace(                                                                    
    frames=copy.deepcopy(archive.frames),                                                              
    id_mapper=copy.deepcopy(archive._id_mapper),                                                       
    alive_mapper=copy.deepcopy(archive._alive_mapper),                                                 
)                                                                                                      
                                                                                                        
# 3. Find the last generation and assign new global IDs                                                
last_gen = injected_archive.gens[-1]                                                                   
last_frame_idx = len(injected_archive._frames) - 1                                                     
                                                                                                        
# Global IDs must not collide with existing ones                                                       
max_existing_id = max(                                                                                 
    gid                                                                                                
    for mapper in injected_archive._id_mapper.values()                                                 
    for gid in mapper.values()                                                                         
)                                                                                                      
new_global_ids = list(range(max_existing_id + 1, max_existing_id + 1 + len(new_strings)))              
                                                                                                        
# 4. Stack the new frame onto the last generation's frame using |                                      
injected_archive._frames[last_frame_idx] = (                                                           
    injected_archive._frames[last_frame_idx] | new_frame                                               
)                                                                                                      
                                                                                                        
# 5. Extend the id_mapper: append new local indices â†’ new global IDs                                   
existing_count = len(injected_archive._id_mapper[last_gen])                                            
for i, gid in enumerate(new_global_ids):                                                               
    injected_archive._id_mapper[last_gen][existing_count + i] = gid                                    
                                                                                                        
# 6. Mark them as alive in the alive_mapper                                                            
for gid in new_global_ids:                                                                             
    injected_archive._alive_mapper[last_gen][gid] = True                                               
                                                                                                        
print(f"Injected {len(new_strings)} robots with IDs {new_global_ids}")                                 
print(f"Last gen frame shape: {injected_archive._frames[last_frame_idx].shape}")                       
                                                                                                        
# Then replace the existing pipeline cells (the to_fit_rsagg, fit_grid, transform_grid block) with       
# versions that use injected_archive:                                                                    
                                                                                                        
# Run the same pipeline on the injected archive                                                        


In [None]:
                                                                                                        
# grid_inj = ctk.TransformerGrid(to_fit_rsagg_inj[0].grab_frame().shape)                                 
# umap_inj = ctk.UmapConfig(                                                                             
#     n_neighbors=umap_n,                                                                                
#     random_state=43,                                                                                   
#     transform_seed=43,                                                                                 
#     # min_dist=0.5,                                                                                      
#     n_jobs=1                                                                                           
# ).get_umap()                                                                                           
# grid_inj[:, :] = umap_inj                                                                              
                                                                                                        
# subset_inj = to_fit_rsagg_inj.new_only(inplace=False)                                                  
# subset_inj.fit_grid(grid_inj)                                                                          
                                                                                                        
# to_embed_inj = to_fit_rsagg_inj[:, :, -1]                       
# # Also include the last gen (which has our injected robots)                                            
# # to_embed_inj = to_fit_rsagg_inj[:, :, -1:]                                                             
# to_embed_inj.transform_grid(grid_inj)                                                                  
      

In [None]:
def render_ctk_string(ctk_string: str) -> str:                                                         
      graph = ctk.node_from_string(ctk_string).to_graph()                                                
      img = ctk.quick_view(                                                                              
          graph,                                                                                         
          return_img=True,                                                                               
          white_background=True,                                                                         
          remove_background=True,                                                                        
          width=140,                                                                                     
          height=140,                                                                                    
          tilted=True,                                                                                   
      )                                                                                                  
      img = center_on_canvas(img)                                                                        
      buffer = BytesIO()                                                                                 
      img.save(buffer, format="WEBP", quality=80)                                                        
      b64 = base64.b64encode(buffer.getvalue()).decode("ascii")                                          
      return f"data:image/webp;base64,{b64}"                                                             
                                                                                                         
                                                                                                        
from io import BytesIO                                                                                 
import base64                                                                                          
from canonical_toolkit.morphology.visual.utils import center_on_canvas 
new_b64s = [render_ctk_string(s) for s in new_strings]

In [None]:
# 1. Features                                                                                          
new_nodes_2 = [ctk.node_from_string(s) for s in new_strings_2]                                         
new_series_2 = [                                                                                       
    ctk.series_from_node_population(new_nodes_2, space_config=sc)                                      
    for sc in sim_configs                                                                              
]                                                                                                      
new_frame_2 = ctk.SimilarityFrame(series=new_series_2)                                                 
                                                                                                        
# 2. Stack onto last gen                                                                               
injected_archive._frames[last_frame_idx] = (                                                           
    injected_archive._frames[last_frame_idx] | new_frame_2                                             
)                                                                                                      
                                                                                                        
# 3. New global IDs (continue from where batch 1 left off)                                             
max_id_2 = max(gid for mapper in injected_archive._id_mapper.values() for gid in mapper.values())      
new_global_ids_2 = list(range(max_id_2 + 1, max_id_2 + 1 + len(new_strings_2)))                        
                                                                                                        
existing_count_2 = len(injected_archive._id_mapper[last_gen])                                          
for i, gid in enumerate(new_global_ids_2):                                                             
    injected_archive._id_mapper[last_gen][existing_count_2 + i] = gid                                  
    injected_archive._alive_mapper[last_gen][gid] = True                                               
                                                                                                        
# 4. Render images                                                                                     
new_b64s_2 = [render_ctk_string(s) for s in new_strings_2]                                             
                                                                                                        
print(f"Injected {len(new_strings_2)} more robots with IDs {new_global_ids_2}") 

In [None]:
to_fit_rsagg_inj = injected_archive.alive_only(inplace=False).map('to_cumulative').map('aggregate')    


In [None]:
to_embed_inj = to_fit_rsagg_inj[:, :, -1]                       
# Also include the last gen (which has our injected robots)                                            
# to_embed_inj = to_fit_rsagg_inj[:, :, -1:]                                                             
to_embed_inj.transform_grid(grid)       

In [None]:
                                                                                                  
# And finally the plotting cell, with the injected robots styled in red:                                 
                                                                                                        
to_plot_embed = to_embed_inj[2, :, -1]                                                                 
                                                                                                        
umap_plotter = ctk.BokehGridPlotter()                                                                  
umap_plotter.config.image_markers = True                                                               
umap_plotter.config.clean_axes = True                                                                  
umap_plotter.config.default_size = DEFAULT_SIZE                                                        
umap_plotter.config.plot_height = CELL_SIZE * 5                                                        
umap_plotter.config.plot_width = CELL_SIZE * 5                                                         
                                                                                                        
umap_plotter.add_2D_numeric_data(                                                                      
    data_2d=to_plot_embed.get_2d_data(),                                                               
    titles_2d=to_plot_embed.get_2d_titles(),                                                           
    global_ids_2d=to_plot_embed.get_2d_ids()                                                           
)                                                                                                      
umap_plotter.add_id_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])                                 
                                                                                                        
# Style existing robots with novelty colors                                                            
umap_plotter.add_id_styling(ids=unique_colors_df['id'], colors=unique_colors_df['novelty_color'])      

umap_plotter.add_id_thumbnails(ids=new_global_ids, b64s=new_b64s)                                      
# Style injected robots in bright red so they stand out                                                
umap_plotter.add_id_styling(                                                                           
    ids=new_global_ids,                                                                                
    colors=['#FF0000'] * len(new_global_ids), 
    sizes=[30,30,30,30,30]                                                         
)                   

umap_plotter.add_id_thumbnails(ids=new_global_ids_2, b64s=new_b64s_2)                                  
umap_plotter.add_id_styling(
    ids=new_global_ids_2, 
    colors=['#00FFFF'] * len(new_global_ids_2),
    sizes=[30]*len(new_global_ids_2)  
)                                                                                   
                                                                                                        
umap_plotter.transpose()                                                                               
umap_plotter.show(super_title=f"Last gen + {len(new_strings)} injected robots (red), umap_n {umap_n}") 
                        

In [None]:
from matplotlib.cm import ScalarMappable  

In [None]:
to_plot_embed = to_embed_inj[:3, :, -1]                                                                 
                                                                                                        
umap_plotter = ctk.GridPlotter()                                                                       
umap_plotter.config.keep_global_scale = False                                                          
                                                                                                        
umap_plotter.add_2D_numeric_data(                                                                      
    data_2d=to_plot_embed.get_2d_data(),                                                               
    titles_2d=to_plot_embed.get_2d_titles(),                                                           
    global_ids_2d=to_plot_embed.get_2d_ids(),                                                          
)                                                                                                      
                                                                                                        
# Existing robots: novelty colors, faded                                                               
umap_plotter.add_id_styling(                                                                           
    ids=unique_colors_df['id'],                                                                        
    colors=unique_colors_df['novelty_color'],                                                          
    alphas=[0.3] * len(unique_colors_df['id'])                                                         
)                                                                                                      
                                                                                                        
# Batch 1: red, full opacity, larger                                                                   
umap_plotter.add_id_styling(                                                                           
    ids=new_global_ids,                                                                                
    colors=['#FF0000'] * len(new_global_ids),                                                          
    sizes=[30] * len(new_global_ids),                                                                  
    alphas=[0.9] * len(new_global_ids)                                                                 
)                                                                                                      
                                                                                                        
# Batch 2: cyan, full opacity, larger                                                                  
umap_plotter.add_id_styling(                                                                           
    ids=new_global_ids_2,                                                                              
    colors=['blue'] * len(new_global_ids_2),                                                        
    sizes=[30] * len(new_global_ids_2),                                                                
    alphas=[0.9] * len(new_global_ids_2)                                                               
)                                                                                                      
                                                                                                        
umap_plotter.config.col_space = 0                                                                      
umap_plotter.config.row_space = 0.2                                                                    
umap_plotter.transpose()                                                                               
                                                                                                        
umap_plotter[:, :].set_xticks([])                                                                      
umap_plotter[:, :].set_yticks([])                                                                      
umap_plotter[:, :].set_xticklabels([])                                                                 
umap_plotter[:, :].set_yticklabels([])                                                                 
umap_plotter.suptitle(f'Last gen + injected robots (red/cyan), umap_n {umap_n}')                       
                                                                                                        
umap_plotter.config.margin = (0.5, 0.2, 0, 0)                                                          
                                                                                                        
import matplotlib.patheffects as pe


import matplotlib.patheffects as pe
# Optional: pip install adjustText
# from adjust_text import adjust_text 

ax = umap_plotter._cells.flatten()[0].ax
plot_data = to_plot_embed.get_2d_data()[0][0]
plot_ids = list(to_plot_embed.get_2d_ids()[0][0])

texts = [] # To store label objects for adjust_text

def annotate_batch(ids, start_num, ring_color='white'):
    for i, gid in enumerate(ids):
        if gid in plot_ids:
            idx = plot_ids.index(gid)
            x, y = plot_data[idx, 0], plot_data[idx, 1]
            label = str(start_num + i)
            
            # 1. Cleaner Ring: Slightly larger than the point (s=120) 
            # and high zorder to ensure it sits ON TOP of the colored dot
            # ax.scatter(x, y, s=120, facecolors='none', edgecolors=ring_color, 
            #            linewidths=1.5, zorder=100)
            
            # 2. Label with a "Halo" for readability
            t = ax.text(
                x, y, label,
                fontsize=5, fontweight='bold', color='black',
                va='center', ha='center',
                zorder=101,
                path_effects=[pe.withStroke(linewidth=1, foreground='white')]
            )
            texts.append(t)

# Run annotations
annotate_batch(new_global_ids, 1)
annotate_batch(new_global_ids_2, 6)

# 3. AUTO-LAYOUT (The Magic Step)
# If you have adjust_text installed, uncomment the next line to stop the overlapping:
# adjust_text(texts, arrowprops=dict(arrowstyle='->', color='black', lw=0.5))
umap_plotter._fig.set_size_inches(6, 6) 
# -----------------------------

umap_plotter.config.keep_global_scale = False
umap_plotter._fig.canvas.draw()

import matplotlib.patheffects as pe
# from adjust_text import adjust_text 

# 1. Shrink and prepare the grid
umap_plotter._fig.set_size_inches(8, 8) 
# umap_plotter.transpose()
umap_plotter.config.keep_global_scale = False

# 2. Extract the multi-dimensional data
all_plot_data = to_plot_embed.get_2d_data() # List of lists of arrays
all_plot_ids = to_plot_embed.get_2d_ids()   # List of lists of lists

# Flatten the cells to iterate through every subplot
cells = umap_plotter._cells.flatten()
# Flatten data/ids to match the cells 1-to-1
flat_data = [item for sublist in all_plot_data for item in sublist]
flat_ids = [list(item) for sublist in all_plot_ids for item in sublist]

def annotate_subplot(ax, plot_data, plot_ids):
    texts = []
    
    def add_labels(ids, start_num):
        for i, gid in enumerate(ids):
            if gid in plot_ids:
                idx = plot_ids.index(gid)
                x, y = plot_data[idx, 0], plot_data[idx, 1]
                
                t = ax.text(
                    x, y, str(start_num + i),
                    fontsize=2, fontweight='bold', color='black',
                    va='center', ha='center', zorder=101,
                    path_effects=[pe.withStroke(linewidth=1, foreground='white')]
                )
                texts.append(t)
    
    add_labels(new_global_ids, 1)
    add_labels(new_global_ids_2, 6)
    return texts

# 3. Apply to EVERY subplot
for i, cell in enumerate(cells):
    subplot_texts = annotate_subplot(cell.ax, flat_data[i], flat_ids[i])
    
    # Optional: If you have adjust_text, use it per subplot
    # adjust_text(subplot_texts, ax=cell.ax, arrowprops=dict(arrowstyle='-', color='black', lw=0.5))

umap_plotter._fig.canvas.draw()
umap_plotter.show()
umap_plotter.show()