# This script will use Evolutionary Algorithm to produce most slow fixating graphs 

In [None]:
import os 
os.environ

In [None]:
# imports
%load_ext autoreload
%autoreload 2

import numpy as np
import joblib
import pandas as pd
from population_graph import PopulationGraph
from analysis.analysis_utils import GRAPH_PROPERTY_COLUMNS
from pathlib import Path
from tqdm import tqdm

import matplotlib.pyplot as plt



In [None]:

BATCH_NAME = 'batch_large_test_30_02'


ROOT = Path(os.getcwd()) 

# Now define your paths relative to ROOT
data_dir = ROOT / "simulation_data"
BATCH_DIR = data_dir / BATCH_NAME


ML_MODELS_DIR = BATCH_DIR / "ml_models"

In [None]:
def add_new_random_graph(graph_zoo: list[PopulationGraph], 
                         wl_set:set, 
                         n_nodes:int, 
                         n_edges:int, 
                         name:str, 
                         seed=None):
    
    new_graph, new_wl = None, None
    while(new_wl is None or new_wl in wl_set):
        new_graph = PopulationGraph.random_connected_graph(n_nodes, n_edges, name=name, seed=seed)
        new_wl = new_graph.wl_hash
    graph_zoo.append(new_graph)
    wl_set.add(new_wl)
    return wl_set


In [None]:
# Create Initial Random Graph Population
SEED = 42
rng = np.random.default_rng(SEED)

N_INITIAL_GRAPH_POPULATION = 10
NUMBER_OF_CHILDREN = 10
GENERATIONS = 100
N_NODES = 31
N_EDGES = 34

random_graph_zoo:list[PopulationGraph] = []
wl_set = set()

for i in range(N_INITIAL_GRAPH_POPULATION):
    # add_new_random_graph(graph_zoo, wl_set, N_NODES, N_EDGES, name=f"random-{i}", seed=int(rng.integers(0, 2**32)))
    new_graph, new_wl = None, None
    while(new_wl is None or new_wl in wl_set):
        new_graph = PopulationGraph.random_connected_graph(N_NODES, N_EDGES, name=f"random-{i}", seed=None)
        new_wl = new_graph.wl_hash
    random_graph_zoo.append(new_graph)
    wl_set.add(new_wl)


In [None]:
def run_evolutionary_search_multi_model(
    initial_population: list,
    model, 
    model_name, 
    secondary_models: dict = None,
    generations: int = 50, 
    pop_size: int = 10,
    n_children: int = 50, 
    objective: str = "maximize",
    rng: np.random.Generator = None,
):
    """
    Runs a (mu + lambda) evolutionary strategy.
    """
    if rng is None:
        rng = np.random.default_rng(42)

    # 1. Initialize State
    current_pop = initial_population.copy()
    wl_set = set([g.wl_hash for g in current_pop])
    prop_cache = {} 
    
    # Initialize history dynamically
    history = {model_name: []}
    if secondary_models:
        for key in secondary_models.keys():
            history[key] = [] # Create an empty list for each secondary model

    print(f"Starting Evolution: {generations} generations, optimization: {objective} {model_name}")

    for gen in tqdm(range(generations), desc="Evolving"):
        
        # --- A. REPRODUCTION ---
        children = []
        max_attempts = 10
        
        for parent in current_pop:
            for i in range(n_children):
                attempts = 0
                while attempts < max_attempts: 
                    seed = rng.integers(0, 2**32)
                    new_name = f'{parent.name.split("_")[0]}_gen_{gen}' 
                    
                    child = parent.mutate_graph(seed=seed, name=new_name)
                    
                    if child.wl_hash not in wl_set:
                        children.append(child)
                        wl_set.add(child.wl_hash)
                        break
                    attempts += 1

        # --- B. EVALUATION ---
        candidates = current_pop + children
        
        for g in candidates:
            if g.wl_hash not in prop_cache:
                prop_cache[g.wl_hash] = g.calculate_graph_properties()

        all_props = [prop_cache[g.wl_hash] for g in candidates]
        X = pd.DataFrame(all_props)
        X = X[GRAPH_PROPERTY_COLUMNS].select_dtypes(include=[np.number])
        if "density" in X.columns:
            X = X.drop(columns=["density"])
            
        fitness_scores = model.predict(X)
        
        # --- C. SELECTION & SORTING ---
        if objective == "maximize":
            sorted_indices = np.argsort(fitness_scores)[::-1]
        else:
            sorted_indices = np.argsort(fitness_scores)
            
        top_indices = sorted_indices[:pop_size]
        current_pop = [candidates[i] for i in top_indices]
        
        # --- D. SECONDARY TRACKING & LOGGING ---
        if secondary_models: 
            X_survivors = X.iloc[top_indices]
            # Fixed the .items() call here
            for key, val in secondary_models.items():
                history[key].append(np.mean(val.predict(X_survivors)))
        
        # Log primary fitness metrics
        # history['best_fitness'].append(fitness_scores[top_indices[0]])
        history[model_name].append(np.mean(fitness_scores[top_indices]))

    return current_pop, history

In [None]:
def plot_multi_model_history(history, main_model_name, secondary_model_names, objective):
    if isinstance(secondary_model_names, str):
        secondary_model_names = [secondary_model_names]
        
    fig, ax1 = plt.subplots(figsize=(12, 7))
    
    # --- STYLING RULES ---
    def get_style(model_name):
        # Determine Color
        if "LR" in model_name or "Linear Regression" in model_name:
            color = '#1f77b4' # Blue
        elif "XGBOOST" in model_name:
            color = '#d62728' # Red
        else:
            color = '#2ca02c' # Green (Fallback)
            
        # Determine Pattern
        if "Probability" in model_name:
            linestyle = "--" # Dashed line for Probability
        else:
            linestyle = "-"  # Solid line for Time
            
        return color, linestyle
    # ---------------------
    
    # 1. Determine the metric for the primary (Left) axis
    main_is_prob = "Probability" in main_model_name
    
    # 2. Setup Left Axis Labels
    ax1.set_xlabel("Generation", fontweight='bold')
    if main_is_prob:
        ax1.set_ylabel("Fixation Probability", fontweight='bold', color='black')
    else:
        ax1.set_ylabel("Fixation Time (Steps)", fontweight='bold', color='black')
    
    lines = []
    
    # Plot Main Model Line (Average Only)
    c_main, ls_main = get_style(main_model_name)
    
    # --- THE FIX: Use 'avg_fitness' instead of main_model_name ---
    l_main = ax1.plot(history[main_model_name], label=f"Avg: {main_model_name} (Main)", 
                      color=c_main, linestyle=ls_main, linewidth=3)
    lines += l_main
    # -------------------------------------------------------------
    
    # 3. Setup Right Axis Labels
    ax2 = ax1.twinx()
    if not main_is_prob:  
        ax2.set_ylabel("Fixation Probability", fontweight='bold', color='black')
    else:                 
        ax2.set_ylabel("Fixation Time (Steps)", fontweight='bold', color='black')
        
    # 4. Plot Secondary Models
    for sec_name in secondary_model_names:
        sec_is_prob = "Probability" in sec_name
        
        # Route to the correct axis based on metric
        target_ax = ax1 if (sec_is_prob == main_is_prob) else ax2
        
        c_sec, ls_sec = get_style(sec_name)
        l_sec = target_ax.plot(history[sec_name], label=f"Avg: {sec_name}", 
                               color=c_sec, linestyle=ls_sec, linewidth=2.5)
        lines += l_sec

    # 5. Set Limits (Ensuring Y-axes strictly start at 0)
    if main_is_prob:
        ax1.set_ylim(0, 1.05)
        ax2.set_ylim(bottom=0)
    else:
        ax1.set_ylim(bottom=0)
        ax2.set_ylim(0, 1.05)

    # 6. Sort and Create Unified Legend
    # Zip lines and labels together so they sort in tandem
    lines_labels = list(zip(lines, [l.get_label() for l in lines]))
    
    def legend_sort_key(item):
        label = item[1]
        
        # Priority 1: Model (LR=0, XGBOOST=1, Other=2)
        if "LR" in label or "Linear Regression" in label:
            model_sort = 0
        elif "XGBOOST" in label:
            model_sort = 1
        else:
            model_sort = 2
            
        # Priority 2: Unit (Time=0, Probability=1)
        if "Probability" in label:
            unit_sort = 1
        else:
            unit_sort = 0
            
        return (model_sort, unit_sort)

    # Sort using our custom logic
    lines_labels.sort(key=legend_sort_key)
    
    # Unpack back into lists
    sorted_lines, sorted_labels = zip(*lines_labels)
    
    # Render with the exact same styling as before
    ax1.legend(sorted_lines, sorted_labels, loc='upper center', bbox_to_anchor=(0.5, -0.15), 
               fancybox=True, shadow=True, ncol=2)
    
    # Final Formatting
    ax1.grid(True, linestyle=':', alpha=0.7)
    plt.title(f"Evolution of Topologies\nOptimizing: {objective.title()} {main_model_name}", fontsize=14, pad=15)
    fig.tight_layout()
    plt.show()

In [None]:

TIME_LR_MODEL = "LR Fixation Time"
TIME_XGBOOST_MODEL = "XGBOOST Fixation Time"
PROB_LR_MODEL = "LR Fixation Probability"
PROB_XGBOOST_MODEL = "XGBOOST Fixation Probability"


models = {
    TIME_LR_MODEL: joblib.load(ML_MODELS_DIR / 'mean_steps_linear_regression_pipeline.joblib'),
    TIME_XGBOOST_MODEL: joblib.load(ML_MODELS_DIR / 'mean_steps_xgboost_model.joblib'),
    PROB_LR_MODEL: joblib.load(ML_MODELS_DIR / 'prob_fixation_linear_regression_pipeline.joblib'),
    PROB_XGBOOST_MODEL: joblib.load(ML_MODELS_DIR / 'prob_fixation_xgboost_model.joblib'),
}


In [None]:
# Run the Evolutionary Algorithm! 
for model_name in models.keys():
    objective = "maximize"        # can be 'maximize' or 'minimize'
    secondary_models = {
        k: v 
        for k, v in models.items() 
        if k != model_name
    }
    # 1. Configuration
    params = {
        "initial_population": random_graph_zoo, # Start with your random zoo
        "model": models[model_name],                     # Your trained Linear Regression
        "model_name": model_name,
        "secondary_models": secondary_models,
        "generations": GENERATIONS,               # How long to run
        "pop_size": N_INITIAL_GRAPH_POPULATION,                  # Keep top 10 elite graphs
        "n_children": NUMBER_OF_CHILDREN,                # Generate 30 new mutatesd graphs per graph in the population
        "objective": objective,       
        "rng": rng
    }
    # 2. Run
    final_pop_1, history = run_evolutionary_search_multi_model(**params)

    plot_multi_model_history(history, main_model_name=model_name, secondary_model_names=list(secondary_models.keys()), objective=objective)
    winner_graph_zoo_file = Path('graph_zoos') / (f'extreme_{objective}_{model_name.replace(" ", "_")}.joblib')
    winner_graph_zoo_file.parent.mkdir(parents=True, exist_ok=True)
    joblib.dump(final_pop_1, winner_graph_zoo_file)

In [None]:
for graph in random_graph_zoo:
    graph.draw()

In [None]:
for graph in final_pop_1:
    graph.draw()