---

In [None]:
import json          
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import numpy as np
import sys
from pathlib import Path
from rich.console import Console
from bokeh.plotting import output_notebook

import canonical_toolkit as ctk

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
DATA_FOLDER = None
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
output_notebook()
console = Console()                                                                                                                                                                                                                                                        
sys.path.insert(0, str(Path.cwd().parent))                                                                                                                                                                                                                                 
from ea.config import Config 

if not DATA_FOLDER:
    df = pd.read_csv('run_history.csv')
    DATA_FOLDER = df['output_folder'].iloc[-1]

print(DATA_FOLDER)
data = pd.read_sql("SELECT * FROM individual", sqlite3.connect(f"{DATA_FOLDER}/database.db"))
config = Config.load(DATA_FOLDER)
config.large_description()

In [None]:
tags_expanded = data['tags_'].apply(lambda x: json.loads(x) if isinstance(x, str) else x).apply(pd.Series)                                        
data = pd.concat([data, tags_expanded], axis=1)                                                                                                   
                                                                                                                                                                                                                                                     
data['gen'] = data.apply(                                                                                                                         
    lambda row: list(range(int(row['time_of_birth']), int(row['time_of_death']) + 1)),                                                            
    axis=1                                                                                                                                        
)   
                                                                                                                                              
gen_df = (data                                                                                                                                                     
    .explode('gen')                                                                                                                                                
    .rename(columns={'fitness_': 'fitness', 'genotype_': 'genotype', 'tags_': 'tags'})                                                                             
    .sort_values(['gen', 'ctk_string'], ascending=[True, True])                                                                                                    
)                                                                                                                                               
                                                                                                                                                
# Add rank within each generation                                                                                                                 
gen_df['rank'] = gen_df.groupby('gen').cumcount()                                                                                                                  
gen_df = gen_df.set_index(['gen', 'rank'])
gen_df.head()

---

### Plot fitness + 'stored values'

In [None]:
from ea.analysis.plot_metrics import plot_metrics

In [None]:
to_plot = ['fitness']
if config.STORE_NOVELTY:
    to_plot.append('novelty')
if config.STORE_SPEED:
    to_plot.append('speed')
                                                                                                                                                                
plot_metrics(gen_df, metrics=to_plot, is_max=config.IS_MAXIMISATION)  

### Livespan Fittest Individuals

In [None]:
from ea.analysis.plot_top_lifespans import plot_top_lifespans                                                                                                                               

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
column = 'fitness'
top_x = 10
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plot_top_lifespans(gen_df, is_maximalisation=config.IS_MAXIMISATION, column=column, top_x=top_x)

In [None]:
from ea.analysis.plot_lifespan_analysis import plot_lifespan_analysis

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
fitness_lim = (
    0, 
    0.01
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plot_lifespan_analysis(gen_df, fitness_xlim=fitness_lim)

---

### Quick Dirty Robot Generation Plotter

In [None]:
from IPython.display import HTML                                                                                                                                           

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plot_generation = 0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
html = "<div style='display:flex;flex-wrap:wrap;gap:4px'>"                                                                                                                 
for _, row in gen_df.loc[plot_generation].iterrows():                                                                                                                                  
    html += f'<img src="{row["image"]}" title="fit={row["fitness"]:.2f}" style="height:64px">'                                                                             
html += "</div>"  

print(f'plotting generation {plot_generation}')
plot_generation = (plot_generation + 1) % config.NUM_GENERATIONS                                                                                                                                                         
HTML(html)  


In [None]:
from ea.analysis.high_res_robot_gens import high_res_robot_gens

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
amt = 2
gen_samples = 1
# or fitness/ novelty
col_name = 'speed'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

plot fittest

In [None]:
plotter = high_res_robot_gens(gen_df, config, top_n=amt, n_samples=gen_samples, by=col_name)                                                                                             
plotter.show()    

plot least fit

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
amt = 2
gen_samples = 1
# or fitness/ novelty
col_name = 'speed'
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
plotter = high_res_robot_gens(gen_df, config, top_n=-amt, n_samples=gen_samples, by=col_name) # multiplies top with -1 to get the least fit                                                                                         
plotter.show()    

---

### Similarity Frame analysis

In [None]:
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# # evenly spaced out generations like before
# num_generations: int | None = 5 
# max_radius = None
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
frame_folders = sorted((config.OUTPUT_FOLDER / "feature_frames").glob("gen_*"))
all_feature_frames = [ctk.SimilarityFrame.load(f) for f in frame_folders]

### Heatmaps

radius cosine heatmap across spaces. show 1 GENERATION

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
generation = 0
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
heat_map_show = all_feature_frames[generation].map('cosine_similarity', inplace=False)

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
interactive = True
cell_size = 150
# Select what area/ to plot with slicing
heat_map_show = heat_map_show[:,::-1] 

# [OPTIONAL] Show the cumulative sum across radii
# heat_map_show.to_cumulative(inplace=False)

# [OPTIONAL] Show what happens when aggregating
# heat_map_show = ctk.SimilarityFrame([heat_map_show.aggregate()])
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
print(heat_map_show)

In [None]:
data_2d = [
    [inst.matrix for inst in serie.instances] 
    for serie in heat_map_show.series
]
id_map = gen_df.loc[generation, 'id'].to_dict()
titles = [
    [f"{serie.label} - Radius {inst.radius}" for inst in serie.instances]
    for serie in heat_map_show.series 
]
real_ids = [
    [
        [
            [(id_map[i], id_map[j]) for j in range(inst.shape[1])] 
            for i in range(inst.shape[0])
        ]
        for inst in serie.instances
    ]
    for serie in heat_map_show.series
]
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=real_ids,
        titles_2d=titles
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles    
    )
plotter.show()

radius cosine heatmap across GENERATIONS. show 1 RADIUS

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# evenly spaced out generations like before
num_generations: int | None = 5 
radius = 3
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
cosine_generation_frames = [
    all_feature_frames[i].map('cosine_similarity', inplace=False) 
    for i in np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
]

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
interactive = True
cell_size = 150
# Select what area/ to plot with slicing
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
# Process all generations
gen_indices = np.linspace(0, config.NUM_GENERATIONS, num=num_generations, dtype=int)
target_r = 3

# Chain: Map cosine similarity, then safely pick the best radius for each series
sliced_frames = [
    all_feature_frames[i].map('cosine_similarity', inplace=False).map(
        lambda s: s[min(target_r, max(s.radii))]
    )
    for i in gen_indices
]

In [None]:
# Grid [Space][Generation]
# f[s] returns a Matrix (the result of the lambda slice)
data_2d = [[f[s].matrix for f in sliced_frames] for s in spaces]

# Extract titles and actual radii used
titles = [[f"{s} (Gen {g}) - R{f[s].radius}" for g in gen_indices] for s in spaces]

# Safe ID mapping
real_ids = [
    [
        [[(id_maps[g][i], id_maps[g][j]) for j in range(f[s].shape[1]) if j in id_maps[g]]
          for i in range(f[s].shape[0]) if i in id_maps[g]]
        for f, g in zip(sliced_frames, gen_indices)
    ] for s in spaces
]
if interactive:
    plotter = ctk.BokehGridPlotter()
    plotter.add_thumbnails(ids=gen_df['id'], b64s=gen_df['image'])
    plotter.config.plot_width = cell_size
    plotter.config.plot_height = cell_size
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        global_ids_2d=real_ids,
        titles_2d=titles
    )
    plotter.transpose()
else:
    plotter = ctk.GridPlotter()
    plotter.add_2D_numeric_data(
        data_2d=data_2d,
        titles_2d=titles    
    )
plotter.show()

radius cosine heatmap across GENERATIONS. show 1 SPACE

### UMAP

In [None]:
#TODO I want to test the differences between the 2
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
umap_config = ctk.UmapConfig(
    n_neighbors=2,
    metric='precomputed',
    n_jobs=-1
)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
all_umap_frames = [frame.map('umap_embed', config=config, inplace=False) for frame in all_cosine_frames]

In [None]:
if interactive:
    # bokeh plotter
    

else:
    # grid plotter
