In [None]:
import json          
import pandas as pd
import matplotlib.pyplot as plt
import sqlite3
import numpy as np
import sys
from pathlib import Path
from rich.console import Console                                                                                                                                        
import warnings
import canonical_toolkit as ctk
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
DATA_FOLDER = None
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
warnings.filterwarnings("ignore", message="n_jobs value.*overridden.*")                                       
console = Console()                                                                                           
                                                                                                            
# Step out of analysis/ folder first                                                                          
base_folder = Path.cwd().parent                                                                               
                                                                                                            
# 1. Check if DATA_FOLDER is already set                                                                      
if DATA_FOLDER:                                                                                               
    data_path = Path(DATA_FOLDER)                                                                             
    ea_folder = data_path.parent.parent  # __data__/run_xxx -> ea/                                            
                                                                                                            
# 2. Try to find run_history.csv in parent (ea/ folder)                                                       
elif (base_folder / "run_history.csv").exists():                                                              
    df = pd.read_csv(base_folder / "run_history.csv")                                                         
    data_path = base_folder / df["output_folder"].iloc[-1]                                                    
    ea_folder = base_folder                                                                                   
                                                                                                            
# 3. Assume notebook is inside output_folder/analysis/, walk up to find config.py                             
else:                                                                                                         
    ea_folder = base_folder                                                                                   
    while ea_folder != ea_folder.parent:                                                                      
        if (ea_folder / "config.py").exists():                                                                
            break                                                                                             
        ea_folder = ea_folder.parent                                                                          
    else:                                                                                                     
        raise FileNotFoundError("Could not find ea/config.py in any parent directory")                        
    data_path = base_folder  # output_folder is parent of analysis/                                           
                                                                                                            
sys.path.insert(0, str(ea_folder.parent))                                                                     
from ea.config import Config                                                                                  
                                                                                                            
print(data_path)                                                                                              
data = pd.read_sql("SELECT * FROM individual", sqlite3.connect(data_path / "database.db"))                    
config = Config.load(data_path)                                                                               
config.large_description()

In [None]:
archive = ctk.SimilarityArchive.load(
    frame_folder_path=f"{config.OUTPUT_FOLDER}/feature_frames",
    db_file_path=f"{config.OUTPUT_FOLDER}/database.db"
)

In [None]:
tags_expanded = data['tags_'].apply(lambda x: json.loads(x) if isinstance(x, str) else x).apply(pd.Series)                                        
data = pd.concat([data, tags_expanded], axis=1)                                                                                                   
                                                                                                                                                                                                                                                     
data['gen'] = data.apply(                                                                                                                         
    lambda row: list(range(int(row['time_of_birth']), int(row['time_of_death']) + 1)),                                                            
    axis=1                                                                                                                                        
)

In [None]:
gen_df = (data                                                                                                                                                     
    .explode('gen')                                                                                                                                                
    .rename(columns={'fitness_': 'fitness', 'genotype_': 'genotype', 'tags_': 'tags'})                                                                             
    .sort_values(['gen', 'ctk_string'], ascending=[True, True])                                                                                                    
)                                                                                                                                                                                                                                                               
gen_df['rank'] = gen_df.groupby('gen').cumcount()                                                                                                                  
gen_df = gen_df.set_index(['gen', 'rank'])

survivors_df = gen_df.reset_index()
survivors_df = survivors_df[survivors_df['time_of_death'] > survivors_df['gen']]
survivors_df = survivors_df.set_index(['gen', 'rank'])

In [None]:
survivors_df.head()

In [None]:
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
plot_generation = config.NUM_GENERATIONS-1
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In [None]:
amt = 3
n_samples = 5
sample_gens = np.linspace(0, config.NUM_GENERATIONS - 1, n_samples, dtype=int)
ascending = not config.IS_MAXIMISATION

grid = [[
    ctk.quick_view(ctk.node_from_string(
        survivors_df.loc[g].sort_values('fitness', ascending=ascending).iloc[i].ctk_string
    ).to_graph(), return_img=True, white_background=True) 
    for g in sample_gens] for i in range(amt)
]

plotter = ctk.GridPlotter(n_rows=amt, n_cols=n_samples)
plotter.add_2D_image_data(grid)
[plotter[0, i].set_title(f"gen: {g}") for i, g in zip(range(n_samples), sample_gens)]
plotter.show()

In [None]:
amt = 100

last_gen = config.NUM_GENERATIONS - 1
ascending = not config.IS_MAXIMISATION
last_gen_df = survivors_df.loc[last_gen].sort_values('fitness', ascending=ascending).head(amt)
images = []
for i in range(amt):
    ind = last_gen_df.iloc[i]
    img = ctk.quick_view(
        ctk.node_from_string(ind.ctk_string).to_graph(), 
        return_img=True, 
        white_background=True
    )
    images.append(img)
   
plotter = ctk.GridPlotter()
plotter.add_image_data(images, shape=(5,20))
# [plotter[0, i].set_title(f"gen: {g}") for i, g in zip(range(n_samples), sample_gens)]
plotter.show()
plotter.save('100_fittest.png')

In [None]:
alive = archive.alive_only(inplace=False)


### HEATMAPS

1 GEN (START), all r, all spaces

In [None]:
cosine = alive.map('cosine_similarity', inplace=False)

In [None]:
heatmap_plot = cosine[0]

heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

1 GEN (end), all r, all spaces

In [None]:
heatmap_plot = cosine[config.NUM_GENERATIONS-1]


heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

1 RADIUS [pick], all spaces, show evolution

In [None]:
heatmap_plot = cosine[2, :, ::5]


heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

### r [CUMUL]

In [None]:
cumul_cosine = cosine.map('to_cumulative', inplace=False)

1 GEN (start), all r [CUMUL], all spaces

In [None]:
heatmap_plot = cumul_cosine[0]


heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

1 GEN (end), all r [CUMUL], all spaces

In [None]:
heatmap_plot = cumul_cosine[-1]


heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

1 RADIUS [CUMUL] [pick], all spaces, show evolution

In [None]:
heatmap_plot = cumul_cosine[2, :, ::5]

heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

### r [CUMUL] space [CUMUL]

In [None]:
cumul_cumul_cosine = cumul_cosine.map('to_cumulative', inplace=False)

In [None]:
heatmap_plot = cumul_cumul_cosine[:, :, ::5]

heatmap_plotter = ctk.GridPlotter()
heatmap_plotter.add_2D_numeric_data(
    data_2d=heatmap_plot.get_2d_data(), 
    titles_2d=heatmap_plot.get_2d_titles(),
    cmap="Blues"
)
heatmap_plotter.config.col_space = 0.05
heatmap_plotter.config.row_space = 0.2
heatmap_plotter.add_global_colorbar("right")
heatmap_plotter.transpose()
heatmap_plotter.show()

---

### Umap Space embeddings

In [None]:
# color prep for dots

df_all = survivors_df.reset_index()
norm = mcolors.Normalize(vmin=df_all.fitness.min(), vmax=df_all.fitness.max())
cmap = plt.get_cmap('RdYlGn')
dot_ids = df_all.id.tolist()
dot_colors = [mcolors.to_hex(cmap(norm(f))) for f in df_all.fitness]
top_5_ids = set(df_all.sort_values('fitness', ascending=False).head(5).id)
dot_sizes = [9 if i in top_5_ids else 4 for i in dot_ids]

In [None]:
to_fit = alive.replace()

grid = ctk.TransformerGrid(to_fit[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=5,
).get_umap()  
print(umap)
grid[:,:] = umap

In [None]:
subset = to_fit.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = alive.replace()[:, :, 0]
to_embed.transform_grid(grid)

1 GEN (START), all r, all spaces

In [None]:
to_plot_embed = to_embed[0]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

umap_plotter[:-1, :].set_xticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

1 GEN (end), all r, all spaces

In [None]:
to_plot_embed = to_embed[-1]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

umap_plotter[:-1, :].set_xticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

1 RADIUS [pick], all spaces, show evolution

In [None]:
to_plot_embed = to_embed[2, :, :]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

### r [AGG]

In [None]:
to_fit_ragg = alive.map('to_cumulative', inplace=False)

grid = ctk.TransformerGrid(to_fit_ragg[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=5,
    random_state=None,
    transform_seed=None,
    n_jobs=-1
).get_umap()  
print(umap)
grid[:,:] = umap

In [None]:
subset = to_fit_ragg.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = to_fit_ragg[:, :, ::5]
to_embed.transform_grid(grid)

1 GEN (start), all r [AGG], all spaces

In [None]:
to_plot_embed = to_embed[0]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

umap_plotter[:-1, :].set_xticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

1 GEN (end), all r [AGG], all spaces

In [None]:
to_plot_embed = to_embed[-1]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

umap_plotter[:-1, :].set_xticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

1 r [AGG] [pick], all spaces, show evolution

In [None]:
to_plot_embed = to_embed[1, :, ::5]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

### [AGG] spaces

In [None]:
to_fit_sagg = alive.replace().map('aggregate')[:, :, :]

grid = ctk.TransformerGrid(to_fit_sagg[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=5,
    random_state=None,
    transform_seed=None,
    n_jobs=-1
).get_umap()  
print(umap)
grid[:,:] = umap

print(to_fit_sagg)

In [None]:
subset = to_fit_sagg.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = to_fit_sagg[:, :, ::5]
to_embed.transform_grid(grid)

1 GEN (start), all r, 1 [AGG] space

In [None]:
to_plot_embed = to_embed[0]


umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

1 GEN (start), all r, 1 [AGG] space

In [None]:
to_plot_embed = to_embed[-1]

umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

all r, 1 space [agg], show evolution

In [None]:
to_plot_embed = to_embed[:, 0, ::5]

umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

In [None]:
to_fit_ragg = alive.map('to_cumulative', inplace=False)

grid = ctk.TransformerGrid(to_fit_ragg[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=5,
    random_state=None,
    transform_seed=None,
    n_jobs=-1
).get_umap()  
print(umap)
grid[:,:] = umap

In [None]:
subset = to_fit_ragg.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = to_fit_ragg[:, :, ::5]
to_embed.transform_grid(grid)

In [None]:
umap_plotter = ctk.GridPlotter()
umap_plotter.add_collapsed_data(
    data=to_plot_embed.get_2d_data(), 
    title=to_plot_embed.get_2d_titles()[0][0][:-7]
)
umap_plotter.show()
umap_plotter.to_gif(filepath=f"{config.OUTPUT_FOLDER}/{to_plot_embed.get_2d_titles()[0][0][:-7].strip().lower().replace(',', '_').replace(' ', '_')}.gif")

### r [AGG] space [AGG]

In [None]:
to_fit_rsagg = alive.map('aggregate').map('aggregate')

grid = ctk.TransformerGrid(to_fit_rsagg[0].grab_frame().shape)
umap = umap = ctk.UmapConfig(
    n_neighbors=5,
    random_state=None,
    transform_seed=None,
    n_jobs=-1
).get_umap()  
print(umap)
grid[:,:] = umap

In [None]:
subset = to_fit_rsagg.new_only(inplace=False)
subset.fit_grid(grid)

In [None]:
to_embed = to_fit_rsagg[:, :, ::5]
to_embed.transform_grid(grid)

1 r [agg], 1 space [agg] show evolution

In [None]:
to_plot_embed = to_embed[:, :, ::5]

umap_plotter = ctk.GridPlotter()
umap_plotter.add_2D_numeric_data(
    data_2d=to_plot_embed.get_2d_data(), 
    titles_2d=to_plot_embed.get_2d_titles(),
    global_ids_2d=to_plot_embed.get_2d_ids(),
)
umap_plotter.add_id_styling(ids=dot_ids, colors=dot_colors, sizes=dot_sizes, alphas=[0.5]*len(dot_ids))
umap_plotter.config.col_space = 0
umap_plotter.config.row_space = 0.2
umap_plotter.transpose()
umap_plotter.set_global_axis_limits(padding=0.1)

# umap_plotter[:-1, :].set_xticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
# umap_plotter[:, 1:].set_yticklabels([])
umap_plotter.show()

show evolution in 1 image

In [None]:
umap_plotter = ctk.GridPlotter()
umap_plotter.add_collapsed_data(
    data=to_plot_embed.get_2d_data(), 
    title=to_plot_embed.get_2d_titles()[0][0][:-7]
)
umap_plotter.show()
umap_plotter.to_gif(filepath=f"{config.OUTPUT_FOLDER}/{to_plot_embed.get_2d_titles()[0][0][:-7].strip().lower().replace(',', '_').replace(' ', '_')}.gif")