# This script will use Evolutionary Algorithm to produce most slow fixating graphs 

In [None]:
# imports
%load_ext autoreload
%autoreload 2

import numpy as np
import joblib
import pandas as pd
from population_graph import PopulationGraph
from analysis.analysis_utils import GRAPH_PROPERTY_COLUMNS


In [None]:
SEED = 42
rng = np.random.default_rng(SEED)

INITIAL_GRAPH_POPULATION = 5
NUMBER_OF_CHILDREN = 5
N_NODES = 31
N_EDGES = 34

In [None]:
def add_new_random_graph(graph_zoo: list[PopulationGraph], 
                         wl_set:set, 
                         n_nodes:int, 
                         n_edges:int, 
                         name:str, 
                         seed=None):
    
    new_graph, new_wl = None, None
    while(new_wl is None or new_wl in wl_set):
        new_graph = PopulationGraph.random_connected_graph(n_nodes, n_edges, name=name, seed=seed)
        new_wl = new_graph.wl_hash
    graph_zoo.append(new_graph)
    wl_set.add(new_wl)
    return wl_set

def next_generation(graph_zoo:list[PopulationGraph], size:int, rng:np.random.Generator=None):
    new_generation = []
    wl_set = set([graph.wl_hash for graph in graph_zoo])
    for graph in graph_zoo:
        for i in range(size):
            new_graph, new_wl = None, None
            while(new_wl is None or new_wl in wl_set):
                seed = rng.integers(0, 2**32) if rng else None
                new_graph = graph.mutate_graph(seed=seed)
                new_wl = new_graph.wl_hash
            new_generation.append(new_graph)
            wl_set.add(new_wl)
    return new_generation

In [None]:
lr = joblib.load('./ml_models/linear_regression_pipeline.joblib')

In [None]:
# Create some random graphs

graph_zoo:list[PopulationGraph] = []
wl_set = set()

for i in range(INITIAL_GRAPH_POPULATION):
    add_new_random_graph(graph_zoo, wl_set, N_NODES, N_EDGES, name=f"random_{i}", seed=int(rng.integers(0, 2**32)))


In [None]:
new_generation = next_generation(graph_zoo, size=NUMBER_OF_CHILDREN, rng=rng)
all_graphs = graph_zoo + new_generation
graph_properties = []
for graph in all_graphs:
    props = graph.calculate_graph_properties()
    graph_properties.append(props)

X = pd.DataFrame(graph_properties)[GRAPH_PROPERTY_COLUMNS].select_dtypes(include=[np.number]).drop(columns=["density"])
y_preds = lr.predict(X)
y_preds


In [None]:
def run_evolution(initial_population, model, n_generations=50, offspring_factor=2, maximize=True):
    """
    Runs the evolutionary loop.
    
    Args:
        initial_population: List of PopulationGraph objects.
        model: The trained ML model (lr) to predict fixation time.
        n_generations: How many loops to run.
        offspring_factor: How many children per parent (e.g., 2 = double population then cull).
        maximize: True for 'slowest' fixation, False for 'fastest'.
    """
    
    population = initial_population.copy()
    history = []
    
    # Keep track of unique graphs to maintain diversity
    seen_hashes = {g.wl_hash for g in population}
    
    pbar = tqdm(range(n_generations), desc="Evolving Graphs")
    
    for gen in pbar:
        # --- 1. Reproduction (Mutation) ---
        offspring = []
        for parent in population:
            for _ in range(offspring_factor):
                # Try to create a unique child
                for _ in range(10): # Retries for uniqueness
                    child = parent.mutate_graph(seed=np.random.randint(1e9))
                    if child.wl_hash not in seen_hashes:
                        offspring.append(child)
                        seen_hashes.add(child.wl_hash)
                        break
        
        # Combine Parents + Offspring
        full_pool = population + offspring
        
        # --- 2. Evaluation (ML Prediction) ---
        # Extract features
        props_list = [g.calculate_graph_properties() for g in full_pool]
        
        # Replicate your specific DataFrame filtering
        X = pd.DataFrame(props_list)[GRAPH_PROPERTY_COLUMNS].select_dtypes(include=[np.number])
        if "density" in X.columns: X = X.drop(columns=["density"])
            
        # Predict
        scores = model.predict(X)
        
        # --- 3. Selection (Elitism) ---
        # Sort indices by score. 
        # If maximize=True (Slow Fixation), we want largest scores.
        # If maximize=False (Fast Fixation), we want smallest scores.
        if maximize:
            best_indices = np.argsort(scores)[::-1] # Descending
        else:
            best_indices = np.argsort(scores)       # Ascending
            
        # Keep top K (Population Size remains constant)
        pop_size = len(initial_population)
        survivor_indices = best_indices[:pop_size]
        
        population = [full_pool[i] for i in survivor_indices]
        best_score = scores[survivor_indices[0]]
        
        # Logging
        history.append(best_score)
        pbar.set_postfix({"Best Score": f"{best_score:.2f}", "Pool": len(full_pool)})
        
    return population, history

In [None]:
# Configuration
GENS = 50
OFFSPRING_FACTOR = 4  # Generates 4 children per parent (Exploration)

# Run Evolution
final_pop, fitness_history = run_evolution(
    initial_population=graph_zoo, 
    model=lr, 
    n_generations=GENS, 
    offspring_factor=OFFSPRING_FACTOR,
    maximize=True # We want SLOW fixation (Extreme)
)

print(f"Best Predicted Fixation Time: {fitness_history[-1]:.2f}")

# Visualize the winner
best_graph = final_pop[0]
print(f"Top Graph Topology: {best_graph.graph}")
nx.draw(best_graph.graph, with_labels=True, node_size=300)