---

In [None]:
import json          
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import numpy as np
import sys
from pathlib import Path
from rich.console import Console
from bokeh.plotting import output_notebook

import warnings
import canonical_toolkit as ctk

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
DATA_FOLDER = None
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
warnings.filterwarnings("ignore", message="n_jobs value.*overridden.*")
output_notebook()
console = Console()                                                                                                                                                                                                                                                        
sys.path.insert(0, str(Path.cwd().parent))                                                                                                                                                                                                                                 
from ea.config import Config 

if not DATA_FOLDER:
    df = pd.read_csv('run_history.csv')
    DATA_FOLDER = df['output_folder'].iloc[-1]

print(DATA_FOLDER)
data = pd.read_sql("SELECT * FROM individual", sqlite3.connect(f"{DATA_FOLDER}/database.db"))
config = Config.load(DATA_FOLDER)
config.large_description()

In [None]:
tags_expanded = data['tags_'].apply(lambda x: json.loads(x) if isinstance(x, str) else x).apply(pd.Series)                                        
data = pd.concat([data, tags_expanded], axis=1)                                                                                                   
                                                                                                                                                                                                                                                     
data['gen'] = data.apply(                                                                                                                         
    lambda row: list(range(int(row['time_of_birth']), int(row['time_of_death']) + 1)),                                                            
    axis=1                                                                                                                                        
)   
                                                                                                                                              
gen_df = (data                                                                                                                                                     
    .explode('gen')                                                                                                                                                
    .rename(columns={'fitness_': 'fitness', 'genotype_': 'genotype', 'tags_': 'tags'})                                                                             
    .sort_values(['gen', 'ctk_string'], ascending=[True, True])                                                                                                    
)                                                                                                                                               
                                                                                                                                                
# Add rank within each generation                                                                                                                 
gen_df['rank'] = gen_df.groupby('gen').cumcount()                                                                                                                  
gen_df = gen_df.set_index(['gen', 'rank'])
gen_df.head()

In [None]:
survivors_df = gen_df.reset_index()
survivors_df = survivors_df[survivors_df['time_of_death'] > survivors_df['gen']]
survivors_df = survivors_df.set_index(['gen', 'rank'])
survivors_df.head()

In [None]:
killed_df = gen_df.reset_index()
killed_df = killed_df[killed_df['time_of_death'] == killed_df['gen']]
killed_df = killed_df.set_index(['gen', 'rank'])
killed_df.head()

In [None]:
temp_df = gen_df.reset_index()
dead_pool = temp_df[temp_df['time_of_death'] == temp_df['gen']].copy()
max_gen_limit = int(temp_df['gen'].max())
dead_pool['gen'] = dead_pool.apply(
    lambda row: list(range(int(row['gen']), max_gen_limit + 1)), 
    axis=1
)
archived_df = dead_pool.explode('gen')
archived_df['gen'] = archived_df['gen'].astype(int)
archived_df = archived_df[archived_df['archived'] == True]

archived_df = archived_df.sort_values(['gen'], ascending=[True])
archived_df['rank'] = archived_df.groupby('gen').cumcount()
archived_df = archived_df.set_index(['gen', 'rank'])

archived_df.head()

---

### Plot fitness + 'stored values'

In [None]:
from ea.analysis.plot_metrics import plot_metrics

In [None]:
to_plot = ['fitness']
if config.STORE_NOVELTY:
    to_plot.append('novelty')
if config.STORE_SPEED:
    to_plot.append('speed')
                                                                                                                                                                
plot_metrics(survivors_df, metrics=to_plot, is_max=config.IS_MAXIMISATION)  

### Livespan Fittest Individuals

In [None]:
from ea.analysis.plot_top_lifespans import plot_top_lifespans                                                                                                                               

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
column = 'fitness'
top_x = 10
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plot_top_lifespans(survivors_df, is_maximalisation=config.IS_MAXIMISATION, column=column, top_x=top_x)

In [None]:
from ea.analysis.plot_lifespan_analysis import plot_lifespan_analysis

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fitness_lim = (
    0, 
    1
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plot_lifespan_analysis(gen_df, fitness_xlim=fitness_lim)

---

### Quick Dirty Robot Generation Plotter

In [None]:
from IPython.display import HTML                                                                                                                                           

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plot_generation = 0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
html = "<div style='display:flex;flex-wrap:wrap;gap:4px'>"                                                                                                                 
for _, row in survivors_df.loc[plot_generation].iterrows():                                                                                                                                  
    html += f'<img src="{row["image"]}" title="fit={row["fitness"]:.2f}" style="height:64px">'                                                                             
html += "</div>"  

print(f'plotting generation {plot_generation}')
plot_generation = (plot_generation + 1) % config.NUM_GENERATIONS                                                                                                                                                         
HTML(html)  


In [None]:
from ea.analysis.high_res_robot_gens import high_res_robot_gens

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
amt = 2
gen_samples = 5
# or fitness/ novelty
col_name = 'fitness'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

plot fittest

In [None]:
plotter = high_res_robot_gens(gen_df, config, top_n=amt, n_samples=gen_samples, by=col_name)                                                                                             
plotter.show()    

plot least fit

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
amt = 2
gen_samples = 5
# or fitness/ novelty
col_name = 'fitness'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plotter = high_res_robot_gens(gen_df, config, top_n=-amt, n_samples=gen_samples, by=col_name) # multiplies top with -1 to get the least fit                                                                                         
plotter.show()    

---

### Similarity Frame analysis

In [None]:
frame_folders = sorted((config.OUTPUT_FOLDER / "feature_frames").glob("gen_*"))
all_feature_frames = [ctk.SimilarityFrame.load(f) for f in frame_folders]

### Heatmaps

radius cosine heatmap across spaces. show 1 GENERATION

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
generation = 9
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
max_num_inds = 100 # doing more will be laggy for the notebook
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
heat_map_show = all_feature_frames[generation].map('cosine_similarity', inplace=False)

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
interactive = True
cell_size = 150
# Select what area/ to plot with slicing
heat_map_show = heat_map_show[:4,:] 
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
print(heat_map_show)

In [None]:
data_2d = [
    [inst.matrix for inst in serie.instances] 
    for serie in heat_map_show.series
]
id_map = gen_df.loc[generation, 'id'].to_dict()
n = data_2d[0][0].shape[0]
id_pairs = np.empty((n, n), dtype=object)
for i in range(n):
    id_i = id_map.get(i, i)
    for j in range(n):
        id_pairs[i, j] = (id_i, id_map.get(j, j))
n_series = len(data_2d)
n_radii = len(data_2d[0])
real_ids = [[id_pairs] * n_radii for _ in range(n_series)]
titles = [
    [f"{serie.label} - Radius {inst.radius}" for inst in serie.instances]
    for serie in heat_map_show.series 
]
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=real_ids,
        titles_2d=titles
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles    
    )
plotter.show()

radius cosine heatmap across GENERATIONS. show 1 RADIUS

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# evenly spaced out generations like before
num_generations: int | None = 6
radius = 1
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
cosine_generation_frames = [
    all_feature_frames[i].map('cosine_similarity', inplace=False) 
    for i in np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
]

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
interactive = True
cell_size = 150
# Select what area/ to plot with slicing
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
radius_slices: list[ctk.SimilarityFrame] = [frame[radius] for frame in cosine_generation_frames]

id_gen_maps = [
    gen_df.loc[i, 'id'].to_dict()
    for i in np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
]
n = heat_map_show.series[0].instances[0].shape[0]

id_gen_pairs = [
    [(id_map[i], id_map[j]) for j in range(n)]
    for i in range(n)
]
real_ids = [
    [id_pairs for _ in serie.instances]
    for serie in heat_map_show.series
]

In [None]:
import numpy as np
# radius_slices: list[ctk.SimilarityFrame] = [frame[radius] for frame in cosine_generation_frames]

data_2d = []
titles_2d = []
global_ids_2d = []
gen_indices = np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
for gen_idx, radius_frame, id_map in zip(gen_indices, radius_slices, id_gen_maps):
    gen_row = []
    gen_titles = []
    gen_ids = []
    
    for series in radius_frame.series:
        # Each series should have exactly 1 instance (at radius=3)
        inst = series.instances[0]
        gen_row.append(inst.matrix)
        gen_titles.append(f"Gen {gen_idx}\n{series.label}")
        
        # Create ID pairs for this generation
        n = inst.shape[0]  # Should be same for all
        id_pairs = [
            [(id_map[i], id_map[j]) for j in range(n)]
            for i in range(n)
        ]
        gen_ids.append(id_pairs)
    
    data_2d.append(gen_row)
    titles_2d.append(gen_titles)
    global_ids_2d.append(gen_ids)
    
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=global_ids_2d,
        titles_2d=titles_2d
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles_2d    
    )
plotter.show()

radius cosine heatmap across GENERATIONS. show 1 SPACE

### UMAP

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
generation = 0
max_radius = 3
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
umap_config = ctk.UmapConfig(
    n_neighbors=2,
    metric='cosine',
    n_jobs=-1
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
heat_map_show = all_feature_frames[generation][:max_radius, :].map('umap_embed',config=umap_config, inplace=False)

In [None]:
print(heat_map_show)

In [None]:
# Get UMAP data (22Ã—2 matrices)
data_2d = [
    [inst.matrix for inst in serie.instances] 
    for serie in heat_map_show.series
]

# Create 1D ID lists for scatter plots
id_map = gen_df.loc[generation, 'id'].to_dict()
n_points = data_2d[0][0].shape[0]  # 22 organisms

# Create 1D list of IDs [id0, id1, ..., id21]
point_ids = [id_map.get(i, i) for i in range(n_points)]

# Create real_ids as 1D lists (not 2D arrays!)
real_ids = [
    [point_ids for _ in serie.instances]  # Same IDs for all radii
    for serie in heat_map_show.series
]
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=real_ids,
        titles_2d=titles
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles    
    )
plotter.show()

Umap across generations

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# evenly spaced out generations like before
num_generations: int | None = 7
radius = 1
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
umap_config = ctk.UmapConfig(
    n_neighbors=2,
    metric='cosine',
    n_jobs=-1
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
cosine_generation_frames = [
    all_feature_frames[i].map('cosine_similarity', inplace=False) 
    for i in np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
]
umap_generations_frames = [
    frame[radius].map('umap_embed',config=umap_config, inplace=False) for frame in cosine_generation_frames
]

In [None]:
data_2d = []
titles_2d = []
global_ids_2d = []
gen_indices = np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
id_gen_maps = [
    gen_df.loc[i, 'id'].to_dict()
    for i in gen_indices
]

for gen_idx, umap_frame, id_map in zip(gen_indices, umap_generations_frames, id_gen_maps):
    gen_row = []
    gen_titles = []
    gen_ids = []
    
    for series in umap_frame.series:
        # UMAP gives 1 instance per series (radius=1)
        inst = series.instances[0]  # Shape: (n_organisms, 2)
        gen_row.append(inst.matrix)
        gen_titles.append(f"Gen {gen_idx}\n{series.label}")
        
        # FIX: For scatter plots (2D embeddings), need 1D list of IDs
        n_points = inst.matrix.shape[0]  # Number of organisms
        point_ids = [id_map.get(i, i) for i in range(n_points)]  # 1D list
        gen_ids.append(point_ids)  # Not 2D id_pairs!
    
    data_2d.append(gen_row)
    titles_2d.append(gen_titles)
    global_ids_2d.append(gen_ids)
    
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=global_ids_2d,  # Now 1D lists for scatter plots
        titles_2d=titles_2d
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles_2d    
    )
plotter.show()

In [None]:
# If you want one UMAP plot per series (aggregate radii)
gen_row = []
gen_titles = []
gen_ids = []
for gen_idx, umap_frame, id_map in zip(gen_indices, umap_generations_frames, id_gen_maps):
    gen_row = []
    gen_titles = []
    gen_ids = []
    
    for series in umap_frame.series:
        # Aggregate UMAP embeddings across radii (average)
        all_embeddings = [inst.matrix for inst in series.instances]
        avg_embedding = np.mean(all_embeddings, axis=0)  # Average across radii
        
        gen_row.append(avg_embedding)
        gen_titles.append(f"Gen {gen_idx}\n{series.label} (avg)")
        
        # IDs
        n_points = avg_embedding.shape[0]
        point_ids = [id_map.get(i, i) for i in range(n_points)]
        gen_ids.append(point_ids)
    
    data_2d.append(gen_row)
    titles_2d.append(gen_titles)
    global_ids_2d.append(gen_ids)
    
    
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=global_ids_2d,
        titles_2d=titles_2d
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles_2d    
    )
plotter.show()

In [None]:
data_2d = []
titles_2d = []
global_ids_2d = []
gen_indices = np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
for gen_idx, radius_frame, id_map in zip(gen_indices, radius_slices, id_gen_maps):
    gen_row = []
    gen_titles = []
    gen_ids = []
    
    for series in radius_frame.series:
        # Each series should have exactly 1 instance (at radius=3)
        inst = series.instances[0]
        gen_row.append(inst.matrix)
        gen_titles.append(f"Gen {gen_idx}\n{series.label}")
        
        # Create ID pairs for this generation
        n = inst.shape[0]  # Should be same for all
        id_pairs = [
            [(id_map[i], id_map[j]) for j in range(n)]
            for i in range(n)
        ]
        gen_ids.append(id_pairs)
    
    data_2d.append(gen_row)
    titles_2d.append(gen_titles)
    global_ids_2d.append(gen_ids)
    
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=global_ids_2d,
        titles_2d=titles_2d
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles_2d    
    )
plotter.show()