<a href="https://colab.research.google.com/github/stepbot/multiScaleEvolutionarySearch/blob/master/Meta_Evolving_ML_EA_training_loop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -*- coding: utf-8 -*-
"""
Meta-Evolving Genetic Algorithm Operators with LLMs (V2)

This script implements a meta-evolutionary algorithm where the core evolutionary
operator itself is evolved by an LLM.

Instead of evolving separate 'crossover' and 'mutate' functions, we evolve a
single, holistic function: `generate_next_population`. This function takes the
current population and their fitness scores and is responsible for producing
the entire next generation.

This approach gives the LLM maximum creative freedom to discover novel
evolutionary strategies, potentially departing from traditional GA structures.
"""

# @title 1. Install Dependencies
!pip install -q -U google-generativeai openai tqdm matplotlib

# @title 2. Setup and Imports
import google.generativeai as genai
import openai
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
import random
import textwrap
from IPython.display import Markdown, display
import itertools
import os
import traceback

# @title 3. Configure LLM APIs
# @markdown Add your API keys to the Colab secrets manager (key icon on the left).
# @markdown - `GOOGLE_API_KEY` for Gemini
# @markdown - `OPENAI_API_KEY` for OpenAI
# @markdown The script will use any and all configured clients.

llm_clients = []

try:
    from google.colab import userdata
    # Configure Gemini
    GOOGLE_API_KEY = userdata.get('gemini_key')
    if GOOGLE_API_KEY:
        genai.configure(api_key=GOOGLE_API_KEY)
        # Use the updated model name here
        llm_clients.append(genai.GenerativeModel('gemini-2.5-pro'))
        print("✅ Successfully configured and added Gemini client.")

        # --- Debug: List available Gemini models ---
        print("\n--- Available Gemini Models (for 'generateContent') ---")
        # The genai.list_models() function gets all models
        for m in genai.list_models():
            if 'generateContent' in m.supported_generation_methods:
                print(m.name)
        print("-----------------------------------------------------\n")
        # --- End Debug ---

    else:
        print("⚠️ Warning: GOOGLE_API_KEY not found in Colab secrets. Gemini client will not be used.")

    # Configure OpenAI
    OPENAI_API_KEY = userdata.get('openai_key')
    if OPENAI_API_KEY:
        # It's good practice to set the env variable for some libraries
        os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
        openai_client = openai.OpenAI() # Create a client instance first
        llm_clients.append(openai_client)
        print("✅ Successfully configured and added OpenAI client.")

        # --- Debug: List available OpenAI models ---
        print("\n--- Available OpenAI Models (GPT models) ---")
        try:
            # The client.models.list() function gets all models
            model_list = [m.id for m in openai_client.models.list() if 'gpt' in m.id.lower()]
            for model_name in sorted(model_list):
                print(model_name)
        except Exception as e:
            print(f"Could not retrieve OpenAI models: {e}")
        print("-------------------------------------------\n")
        # --- End Debug ---

    else:
        print("⚠️ Warning: OPENAI_API_KEY not found in Colab secrets. OpenAI client will not be used.")

except ImportError:
    print("❌ Could not import userdata. Please run this in a Google Colab environment and add API keys to secrets.")

def call_llm_api(client, prompt, model_name_gemini='gemini-2.5-pro', model_name_openai='gpt-5'):
    """A unified function to call either Gemini or OpenAI API."""
    client_name = client.__class__.__module__.split('.')[0]
    if client_name == 'google':
        model = genai.GenerativeModel(model_name_gemini)
        response = model.generate_content(prompt)
        return response.text
    elif client_name == 'openai':
        response = client.chat.completions.create(
            model=model_name_openai,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that only returns Python code."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content
    else:
        raise ValueError(f"Unknown client type: {client_name}")


# @title 4. Define the Machine Learning Problem (Inner Loop)

# --- Configuration ---
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
INPUT_SIZE = 28 * 28
NUM_CLASSES = 10
BATCH_SIZE = 1024

# --- Model Architecture ---
class SimpleNet(nn.Module):
    """A simple MLP suitable for MNIST, used in the inner GA."""
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(INPUT_SIZE, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, NUM_CLASSES)

    def forward(self, x):
        x = x.view(-1, INPUT_SIZE)
        out = self.fc1(x)
        out = self.relu1(out)
        out = self.fc2(out)
        out = self.relu2(out)
        out = self.fc3(out)
        return out

# --- Data Loading & Evaluation ---
def get_data_loaders():
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
    train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    return train_loader, test_loader

def evaluate_nn_model(model, loader):
    model.eval()
    with torch.no_grad():
        correct, total = 0, 0
        for images, labels in loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total


# @title 5. Define the Meta-Evolutionary Algorithm (Outer Loop)

class OperatorIndividual:
    """
    Represents a single evolutionary operator, now with detailed,
    multi-run, per-generation performance history.
    """
    def __init__(self, evolution_operator_code):
        self.evolution_operator_code = evolution_operator_code
        self.run_histories = [] # Stores detailed history from multiple runs
        self.fitness = 0.0      # A single representative score for sorting/selection

    def update_history(self, new_run_histories):
        """Adds new run histories and recalculates the primary fitness score."""
        if not new_run_histories:
            return
        self.run_histories.extend(new_run_histories)
        self._calculate_primary_fitness()

    def _calculate_primary_fitness(self):
        """
        Calculates a single fitness score for elitism and selection.
        We define this as the average of the 'best' fitness from the
        final generation of each run. This rewards operators that
        consistently produce high-performing individuals.
        """
        if not self.run_histories:
            self.fitness = 0.0
            return

        final_bests = [run[-1]['best'] for run in self.run_histories if run]
        if final_bests:
            self.fitness = sum(final_bests) / len(final_bests)
        else:
            self.fitness = 0.0

    def get_performance_summary_text(self):
        """
        Processes the detailed run histories into a clean, averaged,
        human-readable summary table for the LLM prompt.
        """
        if not self.run_histories:
            return "This is a new operator that has not been evaluated yet. Your goal is to create a strong initial version."

        # Average the stats across all runs to create a single, clear time-series
        num_runs = len(self.run_histories)
        # Ensure all runs have the same number of generations
        if not all(len(run) == len(self.run_histories[0]) for run in self.run_histories):
             return "Error: Inconsistent history data."
        num_gens = len(self.run_histories[0])

        avg_history = [{"best": 0.0, "avg": 0.0, "worst": 0.0} for _ in range(num_gens)]

        for run in self.run_histories:
            for i in range(num_gens):
                avg_history[i]["best"] += run[i]["best"]
                avg_history[i]["avg"] += run[i]["avg"]
                avg_history[i]["worst"] += run[i]["worst"]

        summary_lines = [
            f"This operator's performance has been recorded over **{num_runs}** separate runs.",
            "The table below shows the fitness dynamics, **averaged across all runs**.",
            "Fitness is based on the negative loss on training batches (higher is better).\n",
            "| Gen | Best Fitness | Avg Fitness  | Worst Fitness | Spread (Diversity) |",
            "|:---:|:------------:|:------------:|:-------------:|:------------------:|",
        ]

        for i in range(num_gens):
            gen_stats = avg_history[i]
            avg_best = gen_stats['best'] / num_runs
            avg_avg = gen_stats['avg'] / num_runs
            avg_worst = gen_stats['worst'] / num_runs
            spread = avg_best - avg_worst
            summary_lines.append(f"| {i:^3} | {avg_best:12.4f} | {avg_avg:12.4f} | {avg_worst:13.4f} | {spread:18.4f} |")

        summary_lines.extend([
            "\n**Analysis Hints for Your Evolution:**",
            "- **Rate of Improvement:** Analyze the slope of the `Best Fitness` column. A steep, consistent increase is ideal.",
            "- **Population Diversity:** The `Spread (Best-Worst)` column is a proxy for diversity. If it collapses to near-zero too quickly, the population has prematurely converged, and you should consider changes that increase exploration (e.g., higher mutation, different selection).",
            "- **Stability:** Smooth, predictable improvements indicate a stable operator. Jagged or erratic values might suggest the operator is too chaotic."
        ])

        return "\n".join(summary_lines)

    def __str__(self):
        """Provides a compact summary for console logging."""
        if not self.run_histories:
            return f"Fitness: Not yet evaluated\n--- Operator Code ---\n{self.evolution_operator_code}"
        return (f"Overall Fitness (Avg Final Best): {self.fitness:.4f}\n"
                f"Evaluated over {len(self.run_histories)} runs.\n"
                f"--- Operator Code ---\n{self.evolution_operator_code}")


def selection(population, tournament_size=3):
    """Selects a parent from the population using tournament selection based on its primary fitness score."""
    if len(population) < tournament_size:
        return random.choice(population)
    tournament = random.sample(population, tournament_size)
    # The 'fitness' attribute now represents consistent high performance
    tournament.sort(key=lambda x: x.fitness, reverse=True)
    return tournament[0]

def llm_evolve_operator(operator_individual, clients):
    """Uses a randomly selected LLM client to evolve the evolutionary operator, providing detailed generational feedback."""
    if not clients:
        print("Warning: No LLM clients configured. Returning original operator.")
        return OperatorIndividual(operator_individual.evolution_operator_code)

    client = random.choice(clients)
    client_name = client.__class__.__module__.split('.')[0]

    # ✨ NEW: Generate the detailed performance summary ✨
    performance_feedback = operator_individual.get_performance_summary_text()

    prompt = f"""
You are an expert in genetic algorithms. Your task is to evolve a Python function that acts as a holistic evolutionary operator.
This function, `generate_next_population`, is the complete engine of a genetic algorithm, responsible for selection, crossover, and mutation to create the next generation of neural networks.

**Performance Feedback:**
{performance_feedback}

Based on these detailed generational dynamics, your goal is to generate a new, improved version of the operator code.
- If the operator shows **good, stable improvement**, consider a subtle refinement.
- If the operator **stagnates or converges too fast** (low spread), consider changes that increase exploration or diversity.
- If the operator is **unstable or performs poorly**, a more radical change to the evolutionary strategy might be needed.

The function signature MUST BE `def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):`.
It must return a new list of models of the same size as the input population.

**Current Operator Code:**
```python
{operator_individual.evolution_operator_code}
```
Return only the Python code block for the new, evolved function. Do not include explanations or markdown formatting.
"""
    try:
        print(f"--- Calling {client_name.capitalize()} API to evolve operator (with generational feedback) ---")
        if client_name == 'google':
            # Updated model name for best performance
            model = genai.GenerativeModel('gemini-2.5-pro')
            response = model.generate_content(prompt)
            evolved_code = response.text
        elif client_name == 'openai':
            response = client.chat.completions.create(
                model="gpt-5", # Use a strong model for this complex task
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that only returns Python code."},
                    {"role": "user", "content": prompt}
                ]
            )
            evolved_code = response.choices[0].message.content
    except Exception as e:
        print(f"Warning: API call to {client_name.capitalize()} failed: {e}. Returning original operator.")

    return OperatorIndividual(evolved_code)

def repair_operator_with_llm(faulty_code, error_trace, clients):
    """
    Attempts to repair a faulty evolutionary operator using an LLM.
    """
    if not clients:
        print("Warning: No LLM clients configured. Cannot repair operator.")
        return faulty_code

    client = random.choice(clients)
    client_name = client.__class__.__module__.split('.')[0]

    prompt = f"""
You are an expert Python programmer specializing in genetic algorithms. The following Python code for a `generate_next_population` function has failed with an error.

Your task is to analyze the code and the stack trace to identify the bug and provide a corrected version of the function.

**Faulty Code:**
```python
{faulty_code}
```

**Error Stack Trace:**
```
{error_trace}
```

Please provide only the corrected Python code block for the function. Do not include explanations or markdown formatting.
"""

    try:
        print(f"--- Calling {client_name.capitalize()} API to repair operator ---")
        if client_name == 'google':
            model = genai.GenerativeModel('gemini-2.5-pro')
            response = model.generate_content(prompt)
            repaired_code = response.text
        elif client_name == 'openai':
            response = client.chat.completions.create(
                model="gpt-5",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that only returns Python code."},
                    {"role": "user", "content": prompt}
                ]
            )
            repaired_code = response.choices[0].message.content

        return repaired_code
    except Exception as e:
        print(f"Warning: API call to {client_name.capitalize()} for repair failed: {e}. Returning original faulty code.")
        return faulty_code

def create_initial_operator_population(size, clients):
    """
    Creates a diverse first generation of operators.
    It starts with one seed function and uses the LLM to generate variations for the rest.
    """
    print(f"Creating a diverse initial population of size {size} using the LLM...")

    # The seed function remains the same standard GA
    initial_evolution_code = """

def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    '''
    A standard Genetic Algorithm implementation for generating the next population.
    It uses elitism, tournament selection, uniform crossover, and reset mutation.
    '''
    POPULATION_SIZE = len(current_population)
    ELITISM_RATE = 0.05
    TOURNAMENT_SIZE = 5

    # --- Helper: Crossover ---
    def crossover(parent1, parent2, device):
        child = SimpleNet().to(device)
        parent1_dict = parent1.state_dict()
        parent2_dict = parent2.state_dict()
        child_dict = child.state_dict()
        for key in parent1_dict.keys():
            mask = torch.randint(0, 2, size=parent1_dict[key].shape, device=device).float()
            child_dict[key] = (parent1_dict[key] * mask) + (parent2_dict[key] * (1 - mask))
        child.load_state_dict(child_dict)
        return child

    # --- Helper: Mutate ---
    def mutate(model, device):
        MUTATION_RESET_PROB = 0.00001
        with torch.no_grad():
            for param in model.parameters():
                mask = torch.rand_like(param.data) < MUTATION_RESET_PROB
                new_weights = torch.randn_like(param.data)
                param.data[mask] = new_weights[mask]
        return model

    # --- Helper: Tournament Selection ---
    def tournament_selection(population, scores):
        tournament_indices = torch.randint(0, len(population), (TOURNAMENT_SIZE,))
        winner_idx = tournament_indices[torch.argmax(scores[tournament_indices])]
        return population[winner_idx]

    # --- Main Generation Logic ---
    sorted_indices = torch.argsort(fitness_scores, descending=True)
    num_elite = int(POPULATION_SIZE * ELITISM_RATE)
    new_population = [current_population[i] for i in sorted_indices[:num_elite]]

    num_children_to_create = POPULATION_SIZE - num_elite
    for _ in range(num_children_to_create):
        parent1 = tournament_selection(current_population, fitness_scores)
        parent2 = tournament_selection(current_population, fitness_scores)
        child = crossover(parent1, parent2, device)
        child = mutate(child, device)
        new_population.append(child)

    return new_population
"""
    # Create the first individual from the seed code
    seed_individual = OperatorIndividual(initial_evolution_code)

    # Initialize the population with the seed
    population = [seed_individual]

    # Use the LLM to generate the rest of the initial population for diversity
    if size > 1:
        print(f"Generating {size - 1} initial variations from the seed operator...")
        # A progress bar is helpful here since this involves multiple API calls
        for _ in tqdm(range(size - 1), desc="Bootstrapping Initial Population"):
            evolved_individual = llm_evolve_operator(seed_individual, clients)
            population.append(evolved_individual)

    print("Initial population created.")
    return population

def selection(population, tournament_size=3):
    """Selects a parent from the population using tournament selection."""
    if len(population) < tournament_size:
        return random.choice(population)
    tournament = random.sample(population, tournament_size)
    tournament.sort(key=lambda x: x.fitness, reverse=True)
    return tournament[0]


# @title 6. Fitness Evaluation: Running the Inner GA
def run_inner_ga_for_fitness(operator_individual, train_loader, test_loader,clients):
    """
    Evaluates an OperatorIndividual by running a GA using its evolved operator.
    This version captures detailed per-generation statistics (best, avg, worst fitness)
    for each run to analyze the operator's behavior over time.
    """
    # --- Parameters ---
    NUM_EVAL_RUNS = 3 # Run the inner GA 3 times to average out randomness
    # --- Inner GA Parameters ---
    POPULATION_SIZE = 50
    GENERATIONS = 10
    BATCHES_PER_GENERATION = 5
    MAX_REPAIR_ATTEMPTS = 1
    for attempt in range(MAX_REPAIR_ATTEMPTS + 1):
        try:
            local_scope = {}
            exec(operator_individual.evolution_operator_code, {}, local_scope)
            generate_next_population_fn = local_scope['generate_next_population']
            # If exec is successful, break the loop
            break
        except Exception as e:
            if attempt < MAX_REPAIR_ATTEMPTS:
                print(f"Error executing evolved code. Attempting LLM repair ({attempt + 1}/{MAX_REPAIR_ATTEMPTS}).")
                error_trace = traceback.format_exc()
                repaired_code = repair_operator_with_llm(operator_individual.evolution_operator_code, error_trace, clients)
                operator_individual.evolution_operator_code = repaired_code
            else:
                print(f"Error executing evolved code after repair attempts: {e}")
                return None # Return None on final failure

    all_runs_histories = []
    print(f"Starting {NUM_EVAL_RUNS} detailed evaluation runs for this operator...")

    for run_num in range(NUM_EVAL_RUNS):
        print(f"  > Run {run_num + 1}/{NUM_EVAL_RUNS}...")
        # --- Inner GA Execution ---
        loss_fn = nn.CrossEntropyLoss()
        population = [SimpleNet().to(DEVICE) for _ in range(POPULATION_SIZE)]
        streams = [torch.cuda.Stream() for _ in range(POPULATION_SIZE)] if torch.cuda.is_available() else []
        train_iterator = iter(itertools.cycle(train_loader))

        run_history = []

        for generation in range(GENERATIONS):
            fitness_scores = torch.zeros(POPULATION_SIZE, device=DEVICE)
            for _ in range(BATCHES_PER_GENERATION):
                images, labels = next(train_iterator)
                images, labels = images.to(DEVICE), labels.to(DEVICE)
                with torch.no_grad():
                    if torch.cuda.is_available():
                        for i, model in enumerate(population):
                            with torch.cuda.stream(streams[i]):
                                outputs = model(images)
                                loss = loss_fn(outputs, labels)
                                fitness_scores[i] += -loss # Fitness is negative loss
                    else: # CPU fallback
                         for i, model in enumerate(population):
                            outputs = model(images)
                            loss = loss_fn(outputs, labels)
                            fitness_scores[i] += -loss

            if torch.cuda.is_available():
                torch.cuda.synchronize()

            # ✨ NEW: Track detailed stats for this generation ✨
            best_fitness_gen = fitness_scores.max().item()
            worst_fitness_gen = fitness_scores.min().item()
            avg_fitness_gen = fitness_scores.mean().item()
            run_history.append({
                "generation": generation,
                "best": best_fitness_gen,
                "avg": avg_fitness_gen,
                "worst": worst_fitness_gen
            })

            # --- Evolve the population using the LLM's operator ---
            population = generate_next_population_fn(population, fitness_scores, DEVICE, torch, SimpleNet)

        all_runs_histories.append(run_history)

        # We calculate final test accuracy to report progress, but it is NOT the primary fitness metric.
        # The generational dynamics (run_history) are the key feedback for the LLM.
        best_model = population[torch.argmax(fitness_scores)]
        final_acc = evaluate_nn_model(best_model, test_loader)
        print(f"    Run {run_num + 1} finished. Final test accuracy of best model: {final_acc:.2f}%")

    # The detailed history is now the primary output of the evaluation
    return all_runs_histories


# @title 7. Run the Main Meta-Evolutionary Loop
def main():
    """The main function to run the outer (meta) EA."""
    if not llm_clients:
        print("Fatal: No LLM clients were configured. Please add API keys to Colab secrets and restart the runtime.")
        return

    # --- Meta-EA Parameters ---
    META_POPULATION_SIZE = 6
    META_GENERATIONS = 4
    MUTATION_RATE = 0.5

    print(f"Using device: {DEVICE}")
    if not torch.cuda.is_available():
        print("Warning: CUDA not available. This script is very slow on CPU.")

    train_loader, test_loader = get_data_loaders()

    operator_population = create_initial_operator_population(META_POPULATION_SIZE, llm_clients)

    for gen in range(META_GENERATIONS):
        print(f"\n{'='*25} META-GENERATION {gen + 1}/{META_GENERATIONS} {'='*25}")
        print("Evolving the `generate_next_population` operator...")

        for i, individual in enumerate(operator_population):
                # Only evaluate individuals that have no history
                if not individual.run_histories:
                    print(f"\n--- Evaluating Operator Individual {i+1}/{META_POPULATION_SIZE} ---")
                    display(Markdown(f"```python\n{individual.evolution_operator_code}\n```"))
                    # The function now returns a list of detailed run histories
                    new_histories = run_inner_ga_for_fitness(individual, train_loader, test_loader, llm_clients)

                    if new_histories is None:
                        # Assign negative infinity fitness on failure
                        individual.fitness = -float('inf')
                        print(f"Finished evaluation. Operator {i+1} failed and was assigned a fitness of -inf.")
                    else:
                        # Use the new method to update the individual's history and fitness
                        individual.update_history(new_histories)
                        print(f"Finished evaluation. Operator {i+1} primary fitness: {individual.fitness:.4f}")

        # Sort by the primary fitness metric for elitism
        operator_population.sort(key=lambda x: x.fitness, reverse=True)

        print(f"\n--- Meta-Generation {gen+1} Results ---")
        best_op = operator_population[0]
        print(f"Best Operator Fitness (Avg Final Best): {best_op.fitness:.4f}")
        print("Best Performing Operator's Code:")
        display(Markdown(f"```python\n{best_op.evolution_operator_code}\n```"))
        print("Best Operator's Performance Summary:")
        display(Markdown(best_op.get_performance_summary_text()))


        next_generation = []
        next_generation.append(best_op) # Elitism

        while len(next_generation) < META_POPULATION_SIZE:
            parent = selection(operator_population)
            if random.random() < MUTATION_RATE:
                child = llm_evolve_operator(parent, llm_clients)
            else:
                # If not mutating, create a fresh copy with the parent's history
                child = OperatorIndividual(parent.evolution_operator_code)
                child.run_histories = parent.run_histories[:] # Copy history
                child.fitness = parent.fitness
            next_generation.append(child)

        operator_population = next_generation

    print("\nMeta-Evolution finished!")
    print("Final Best Performing Operator:")
    display(Markdown(str(operator_population[0])))

if __name__ == "__main__":
    main()

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m951.0/951.0 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.7/8.7 MB[0m [31m74.0 MB/s[0m eta [36m0:00:00[0m
[?25h✅ Successfully configured and added Gemini client.

--- Available Gemini Models (for 'generateContent') ---
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-2.5-pro-preview-03-25
models/gemini-2.5-flash-preview-05-20
models/gemini-2.5-flash
models/gemini-2.5-flash-lite-preview-06-17
models/gemini-2.5-pro-preview-05-06
models/gemini-2.5-pro-preview-06-05
models/gemini-2.5-pro
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
mod

100%|██████████| 9.91M/9.91M [00:00<00:00, 18.9MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 509kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.70MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 8.25MB/s]

Creating a diverse initial population of size 6 using the LLM...
Generating 5 initial variations from the seed operator...





Bootstrapping Initial Population:   0%|          | 0/5 [00:00<?, ?it/s]

--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Google API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---
Initial population created.

Evolving the `generate_next_population` operator...

--- Evaluating Operator Individual 1/6 ---


```python


def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    '''
    A standard Genetic Algorithm implementation for generating the next population.
    It uses elitism, tournament selection, uniform crossover, and reset mutation.
    '''
    POPULATION_SIZE = len(current_population)
    ELITISM_RATE = 0.05
    TOURNAMENT_SIZE = 5

    # --- Helper: Crossover ---
    def crossover(parent1, parent2, device):
        child = SimpleNet().to(device)
        parent1_dict = parent1.state_dict()
        parent2_dict = parent2.state_dict()
        child_dict = child.state_dict()
        for key in parent1_dict.keys():
            mask = torch.randint(0, 2, size=parent1_dict[key].shape, device=device).float()
            child_dict[key] = (parent1_dict[key] * mask) + (parent2_dict[key] * (1 - mask))
        child.load_state_dict(child_dict)
        return child

    # --- Helper: Mutate ---
    def mutate(model, device):
        MUTATION_RESET_PROB = 0.00001
        with torch.no_grad():
            for param in model.parameters():
                mask = torch.rand_like(param.data) < MUTATION_RESET_PROB
                new_weights = torch.randn_like(param.data)
                param.data[mask] = new_weights[mask]
        return model

    # --- Helper: Tournament Selection ---
    def tournament_selection(population, scores):
        tournament_indices = torch.randint(0, len(population), (TOURNAMENT_SIZE,))
        winner_idx = tournament_indices[torch.argmax(scores[tournament_indices])]
        return population[winner_idx]

    # --- Main Generation Logic ---
    sorted_indices = torch.argsort(fitness_scores, descending=True)
    num_elite = int(POPULATION_SIZE * ELITISM_RATE)
    new_population = [current_population[i] for i in sorted_indices[:num_elite]]

    num_children_to_create = POPULATION_SIZE - num_elite
    for _ in range(num_children_to_create):
        parent1 = tournament_selection(current_population, fitness_scores)
        parent2 = tournament_selection(current_population, fitness_scores)
        child = crossover(parent1, parent2, device)
        child = mutate(child, device)
        new_population.append(child)

    return new_population

```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 10.29%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 20.90%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 10.69%
Finished evaluation. Operator 1 primary fitness: -11.3468

--- Evaluating Operator Individual 2/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator: adaptive elitism, selection, mixed crossover, and adaptive mutation with immigrants.
    """
    POP_SIZE = len(current_population)

    # --- Ensure tensor scores on device ---
    if not torch.is_tensor(fitness_scores):
        scores = torch.tensor(fitness_scores, dtype=torch.float32, device=device)
    else:
        scores = fitness_scores.to(device=device, dtype=torch.float32)

    # --- Fitness statistics for adaptive behavior ---
    mean_abs = scores.abs().mean()
    std = scores.std(unbiased=False)
    coeff_var = float((std / (mean_abs + 1e-8)).item())

    # --- Base hyperparameters ---
    BASE_ELITISM = 0.10
    BASE_TOURNAMENT_FRAC = 0.15
    BASE_GAUSS_PROB = 0.02
    BASE_GAUSS_SCALE = 0.10
    BASE_RESET_PROB = 1e-4
    BASE_LAYER_RESET_RATE = 0.01
    BASE_LAYER_NOISE_RATE = 0.02
    BASE_IMMIGRANT_RATE = 0.05
    EXPLORE_PARENT_PROB = 0.15  # probability to pick random parent instead of tournament

    # --- Adapt parameters based on stability (coeff_var) ---
    if coeff_var < 0.05:
        # Stagnation: increase exploration, more immigrants, stronger mutation
        elitism_rate = max(0.03, BASE_ELITISM * 0.5)
        tournament_frac = max(0.08, BASE_TOURNAMENT_FRAC * 0.7)
        gauss_prob = min(0.08, BASE_GAUSS_PROB * 2.0)
        gauss_scale = BASE_GAUSS_SCALE * 1.5
        reset_prob = BASE_RESET_PROB * 3.0
        layer_reset_rate = BASE_LAYER_RESET_RATE * 2.0
        layer_noise_rate = BASE_LAYER_NOISE_RATE * 1.5
        immigrant_rate = max(0.10, BASE_IMMIGRANT_RATE * 2.0)
        explore_parent_prob = 0.30
    elif coeff_var > 0.50:
        # Unstable: more exploitation, tamer mutation, slightly stronger elitism
        elitism_rate = min(0.18, BASE_ELITISM * 1.5)
        tournament_frac = min(0.25, BASE_TOURNAMENT_FRAC * 1.3)
        gauss_prob = max(0.005, BASE_GAUSS_PROB * 0.5)
        gauss_scale = BASE_GAUSS_SCALE * 0.75
        reset_prob = BASE_RESET_PROB * 0.5
        layer_reset_rate = max(1e-4, BASE_LAYER_RESET_RATE * 0.5)
        layer_noise_rate = max(0.005, BASE_LAYER_NOISE_RATE * 0.5)
        immigrant_rate = max(0.02, BASE_IMMIGRANT_RATE * 0.5)
        explore_parent_prob = 0.10
    else:
        # Stable improvement: gentle refinement
        elitism_rate = BASE_ELITISM
        tournament_frac = BASE_TOURNAMENT_FRAC
        gauss_prob = BASE_GAUSS_PROB
        gauss_scale = BASE_GAUSS_SCALE
        reset_prob = BASE_RESET_PROB
        layer_reset_rate = BASE_LAYER_RESET_RATE
        layer_noise_rate = BASE_LAYER_NOISE_RATE
        immigrant_rate = BASE_IMMIGRANT_RATE
        explore_parent_prob = EXPLORE_PARENT_PROB

    # Clamp values
    tournament_size = max(3, min(POP_SIZE, int(round(POP_SIZE * tournament_frac))))
    num_elite = max(1, int(POP_SIZE * elitism_rate))
    num_immigrants = int(POP_SIZE * immigrant_rate)
    num_immigrants = max(0, min(POP_SIZE - num_elite, num_immigrants))

    # --- Helpers ---
    def clone_model_from(src_model):
        m = SimpleNet().to(device)
        m.load_state_dict(src_model.state_dict())
        return m

    def tournament_selection(population, scores_tensor):
        if POP_SIZE == 1:
            return population[0]
        idxs = torch.randint(0, POP_SIZE, (tournament_size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_local = torch.argmax(sub_scores)
        winner_idx = int(idxs[winner_local].item())
        return population[winner_idx]

    def select_parent(population, scores_tensor):
        # Exploration: random parent with some probability
        if torch.rand((), device=device).item() < explore_parent_prob:
            ridx = int(torch.randint(0, POP_SIZE, (1,), device=device).item())
            return population[ridx]
        return tournament_selection(population, scores_tensor)

    # Mixed crossover: uniform, blend, or tensor-swap chosen per tensor
    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        cdict = child.state_dict()

        for k in cdict.keys():
            t1 = p1[k]
            t2 = p2[k]
            if not torch.is_floating_point(t1):
                # For integer/bool buffers (e.g., num_batches_tracked), pick from a parent
                if torch.rand((), device=device).item() < 0.5:
                    cdict[k] = t1.clone()
                else:
                    cdict[k] = t2.clone()
                continue

            # Choose crossover mode for this tensor
            r = torch.rand((), device=device).item()
            if r < 0.5:
                # Uniform crossover (per-element)
                mask = torch.rand_like(t1) < 0.5
                cdict[k] = torch.where(mask, t1, t2)
            elif r < 0.9:
                # Arithmetic blend crossover
                alpha = torch.rand_like(t1)
                cdict[k] = alpha * t1 + (1.0 - alpha) * t2
            else:
                # Whole-tensor parent swap
                cdict[k] = t1.clone() if (torch.rand((), device=device).item() < 0.5) else t2.clone()

        child.load_state_dict(cdict)
        return child

    # Adaptive mutation: gaussian noise, element resets, occasional layer resets/noise
    fresh_template_state = SimpleNet().to(device).state_dict()

    def mutate(model):
        with torch.no_grad():
            for (name, param) in model.named_parameters():
                # Per-element gaussian noise
                if gauss_prob > 0.0:
                    mask_g = (torch.rand_like(param) < gauss_prob)
                    # Scale by parameter std for scale-aware mutation
                    p_std = param.detach().std()
                    scale = gauss_scale * (p_std + 1e-8)
                    noise = torch.randn_like(param) * scale
                    param.add_(noise * mask_g)

                # Per-element random reset to fresh initialization
                if reset_prob > 0.0:
                    mask_r = (torch.rand_like(param) < reset_prob)
                    if mask_r.any():
                        fresh_vals = fresh_template_state[name]
                        param[mask_r] = fresh_vals[mask_r]

                # Occasional layer-wise full reset
                if torch.rand((), device=device).item() < layer_reset_rate:
                    param.copy_(fresh_template_state[name])

                # Occasional layer-wise gentle noise across entire tensor
                if torch.rand((), device=device).item() < layer_noise_rate:
                    p_std = param.detach().std()
                    scale = (0.5 * gauss_scale) * (p_std + 1e-8)
                    param.add_(torch.randn_like(param) * scale)
        return model

    # --- Build next population ---
    new_population = []

    # Sort individuals by fitness (descending)
    sorted_indices = torch.argsort(scores, descending=True)

    # Elitism: clone best individuals; gently jitter all but the top-1 elite to avoid premature convergence
    elites = []
    for rank in range(num_elite):
        elite_model = clone_model_from(current_population[int(sorted_indices[rank].item())])
        elites.append(elite_model)

    # Jitter elites except best one (if more than 1 elite)
    if len(elites) > 1:
        with torch.no_grad():
            for e in elites[1:]:
                for p in e.parameters():
                    p_std = p.detach().std()
                    if torch.is_floating_point(p) and p_std > 0:
                        e_scale = 0.02 * (p_std + 1e-8)
                        p.add_(torch.randn_like(p) * e_scale)

    new_population.extend(elites)

    # Immigrants: inject fresh random individuals to preserve diversity
    for _ in range(num_immigrants):
        immigrant = SimpleNet().to(device)
        new_population.append(immigrant)

    # Children: fill the rest with selection + crossover + mutation
    def distinct_parents():
        p1 = select_parent(current_population, scores)
        # Try to pick a different parent; fall back after a few tries
        max_tries = 5
        for _ in range(max_tries):
            p2 = select_parent(current_population, scores)
            if p2 is not p1:
                return p1, p2
        return p1, p2  # may be same if population small

    while len(new_population) < POP_SIZE:
        parent1, parent2 = distinct_parents()
        child = crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)

    # In case rounding led to overfill (shouldn't happen, but safe-guard)
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 15.80%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 9.57%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 10.36%
Finished evaluation. Operator 2 primary fitness: -11.4440

--- Evaluating Operator Individual 3/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    '''
    Holistic evolutionary operator with adaptive selection pressure, crossover, and mutation.
    - Rank-based selection with adaptive pressure.
    - Mixed crossover: structured mask and arithmetic extrapolation (BLX-inspired).
    - Adaptive mutation combining Gaussian noise, occasional resets, and tensor-level scaling.
    - Elitism with safe cloning and optional gentle jitter when stagnating.
    '''
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []

    # --- Metrics for adaptive behavior ---
    scores = fitness_scores.detach()
    max_s = torch.max(scores)
    min_s = torch.min(scores)
    std_s = torch.std(scores)
    rng = (max_s - min_s).abs().item() + 1e-12
    spread_frac = (std_s.item() / rng)  # ~0..0.5 typical

    stagnating = spread_frac < 0.15
    unstable = spread_frac > 0.35

    # --- Hyperparameters (adaptive) ---
    if stagnating:
        ELITISM_RATE = 0.10
        SEL_PRESSURE = 2.0   # lower pressure -> more exploration
        MUT_PROB = 0.008
        GAUSS_SCALE = 0.20
        RESET_PROB = 0.0010
        GAMMA_EXTRAP = 0.35
        TENSOR_SCALE_PROB = 0.030
        ELITE_JITTER = True
        ELITE_JITTER_NOISE = 0.02
        ELITE_JITTER_MUT_PROB = 0.001
    elif unstable:
        ELITISM_RATE = 0.14
        SEL_PRESSURE = 7.5   # higher pressure -> exploit
        MUT_PROB = 0.003
        GAUSS_SCALE = 0.08
        RESET_PROB = 0.00010
        GAMMA_EXTRAP = 0.12
        TENSOR_SCALE_PROB = 0.010
        ELITE_JITTER = False
        ELITE_JITTER_NOISE = 0.0
        ELITE_JITTER_MUT_PROB = 0.0
    else:
        ELITISM_RATE = 0.10
        SEL_PRESSURE = 5.0
        MUT_PROB = 0.004
        GAUSS_SCALE = 0.12
        RESET_PROB = 0.00030
        GAMMA_EXTRAP = 0.18
        TENSOR_SCALE_PROB = 0.015
        ELITE_JITTER = False
        ELITE_JITTER_NOISE = 0.0
        ELITE_JITTER_MUT_PROB = 0.0

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE)) if POP_SIZE > 1 else 1

    # --- Helper: clone model safely ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    # --- Rank-based selection probabilities with adaptive pressure ---
    sorted_idx = torch.argsort(scores, descending=True)
    ranks = torch.empty_like(sorted_idx, dtype=torch.float, device=scores.device)
    ranks[sorted_idx] = torch.arange(POP_SIZE, device=scores.device, dtype=torch.float)
    denom = max(1, POP_SIZE - 1)
    weights = torch.exp(-SEL_PRESSURE * (ranks / denom))
    probs = weights / (weights.sum() + 1e-12)

    def select_parent_index():
        return int(torch.multinomial(probs, num_samples=1, replacement=True).item())

    # --- Crossover operator (structured mask + arithmetic extrapolation) ---
    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        cdict = child.state_dict()
        with torch.no_grad():
            for k in cdict.keys():
                t1 = p1[k].to(device)
                t2 = p2[k].to(device)
                # Randomly choose crossover mode per tensor
                # 50% structured mask, 50% arithmetic extrapolation
                if torch.rand(()) < 0.5:
                    # Structured mask: broadcast mask across last dim for stability when tensor is matrix-like
                    if t1.dim() >= 2:
                        mask_shape = list(t1.shape)
                        mask_shape[-1] = 1
                        mask = (torch.rand(mask_shape, device=device) < 0.5).expand_as(t1)
                    else:
                        mask = (torch.rand_like(t1) < 0.5)
                    cdict[k] = torch.where(mask, t1, t2)
                else:
                    # Arithmetic extrapolation: beta in [-gamma, 1+gamma]
                    beta = torch.empty_like(t1).uniform_(-GAMMA_EXTRAP, 1.0 + GAMMA_EXTRAP)
                    cdict[k] = beta * t1 + (1.0 - beta) * t2
        child.load_state_dict(cdict)
        return child

    # --- Mutation operator (adaptive gaussian + reset + tensor scaling) ---
    def mutate(model, mut_prob=MUT_PROB, gauss_scale=GAUSS_SCALE, reset_prob=RESET_PROB, tensor_scale_prob=TENSOR_SCALE_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if p.requires_grad:
                    # Gaussian mutation on a subset of weights
                    mask = torch.rand_like(p) < mut_prob
                    # Derive local std for scale-aware noise; fallback to 1.0 if near-constant
                    local_std = p.detach().std()
                    if not torch.isfinite(local_std) or local_std.item() < 1e-8:
                        local_std = torch.tensor(1.0, device=p.device)
                    noise = torch.randn_like(p) * (gauss_scale * local_std)
                    p[mask] = p[mask] + noise[mask]

                    # Occasional reset mutation
                    reset_mask = torch.rand_like(p) < reset_prob
                    if reset_mask.any():
                        p[reset_mask] = torch.randn_like(p[reset_mask]) * (0.5 * local_std + 1e-8)

                    # Tensor-level scaling mutation (structural)
                    if torch.rand(()) < tensor_scale_prob:
                        scale = 1.0 + (0.10 * torch.randn((), device=p.device))
                        p.mul_(scale)

        return model

    # --- Build next population ---
    new_population = []

    # Elitism: clone top individuals
    elites_idx = sorted_idx[:num_elite]
    for j, idx in enumerate(elites_idx):
        elite_clone = clone_model(current_population[int(idx)])
        # Optional gentle jitter for exploration when stagnating (except absolute best)
        if ELITE_JITTER and j > 0 and POP_SIZE > 2:
            mutate(elite_clone, mut_prob=ELITE_JITTER_MUT_PROB, gauss_scale=ELITE_JITTER_NOISE, reset_prob=0.0, tensor_scale_prob=0.0)
        new_population.append(elite_clone)

    # Generate offspring
    offspring_needed = POP_SIZE - len(new_population)
    for _ in range(offspring_needed):
        # Select parents (avoid identical parents when possible)
        p1_idx = select_parent_index()
        p2_idx = select_parent_index()
        tries = 0
        while p2_idx == p1_idx and tries < 3 and POP_SIZE > 1:
            p2_idx = select_parent_index()
            tries += 1

        parent1 = current_population[p1_idx]
        parent2 = current_population[p2_idx if POP_SIZE > 1 else p1_idx]

        child = crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if population size drifted, trim or pad with clones
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # pad with clones of best
        best_model = current_population[int(sorted_idx[0])]
        while len(new_population) < POP_SIZE:
            new_population.append(clone_model(best_model))

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 12.09%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 13.81%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 17.94%
Finished evaluation. Operator 3 primary fitness: -11.4160

--- Evaluating Operator Individual 4/6 ---


```python
```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Evolves the population using a strategy that balances exploration and exploitation.

    This operator employs:
    - Elitism: Preserves the best-performing individuals.
    - Tournament Selection: Selects parents with a pressure towards higher fitness.
    - Fitness-Weighted Crossover: Creates offspring by blending parent weights,
      giving more influence to the fitter parent.
    - Gaussian Mutation: Applies small, random perturbations to weights, allowing
      for fine-tuning and exploration of the local search space.
    """
    POPULATION_SIZE = len(current_population)
    
    # --- Hyperparameters ---
    ELITISM_RATE = 0.10          # Preserve the top 10% of the population
    TOURNAMENT_SIZE = 5          # Size of the selection tournament
    MUTATION_RATE = 0.05         # Probability of a single weight being mutated
    MUTATION_STRENGTH = 0.1      # Standard deviation of the Gaussian noise for mutation

    # --- Helper: Fitness-Weighted Crossover ---
    def fitness_weighted_crossover(parent1, parent2, fitness1, fitness2, device):
        """
        Creates a child by performing a weighted average of the parents' weights.
        The weights are proportional to the parents' fitness scores.
        """
        child = SimpleNet().to(device)
        p1_dict = parent1.state_dict()
        p2_dict = parent2.state_dict()
        child_dict = child.state_dict()

        # Handle potential for zero or negative fitness
        total_fitness = fitness1 + fitness2
        if total_fitness <= 1e-6: # Avoid division by zero or negative weights
            w1, w2 = 0.5, 0.5
        else:
            w1 = fitness1 / total_fitness
            w2 = fitness2 / total_fitness

        with torch.no_grad():
            for key in p1_dict.keys():
                child_dict[key] = w1 * p1_dict[key] + w2 * p2_dict[key]
        
        child.load_state_dict(child_dict)
        return child

    # --- Helper: Gaussian Mutation ---
    def gaussian_mutate(model, device):
        """
        Applies additive Gaussian noise to a subset of the model's weights.
        """
        with torch.no_grad():
            for param in model.parameters():
                if len(param.shape) > 1: # Apply only to weight matrices/conv kernels
                    mask = torch.rand_like(param.data) < MUTATION_RATE
                    noise = torch.randn_like(param.data) * MUTATION_STRENGTH
                    param.data += noise * mask
        return model

    # --- Helper: Tournament Selection ---
    def tournament_selection(population, scores):
        """
        Selects an individual from the population using a tournament.
        Returns the winner and its index.
        """
        # Pick TOURNAMENT_SIZE random indices
        tournament_indices = torch.randint(0, len(population), (TOURNAMENT_SIZE,))
        
        # Find the index of the best individual within the tournament
        winner_local_idx = torch.argmax(scores[tournament_indices])
        winner_global_idx = tournament_indices[winner_local_idx]
        
        return population[winner_global_idx], winner_global_idx

    # --- Main Generation Logic ---
    
    # 1. Elitism: Carry over the best individuals to the next generation
    sorted_indices = torch.argsort(fitness_scores, descending=True)
    num_elite = int(POPULATION_SIZE * ELITISM_RATE)
    new_population = [current_population[i] for i in sorted_indices[:num_elite]]

    # 2. Crossover and Mutation: Create the rest of the new population
    num_children_to_create = POPULATION_SIZE - num_elite
    for _ in range(num_children_to_create):
        # Select two distinct parents
        parent1, p1_idx = tournament_selection(current_population, fitness_scores)
        parent2, p2_idx = tournament_selection(current_population, fitness_scores)
        while p1_idx == p2_idx:
            parent2, p2_idx = tournament_selection(current_population, fitness_scores)
            
        p1_fitness = fitness_scores[p1_idx]
        p2_fitness = fitness_scores[p2_idx]

        # Create child through crossover
        child = fitness_weighted_crossover(parent1, parent2, p1_fitness, p2_fitness, device)
        
        # Mutate the child
        child = gaussian_mutate(child, device)
        
        new_population.append(child)

    return new_population
```
```

Error executing evolved code. Attempting LLM repair (1/1).
--- Calling Google API to repair operator ---
Error executing evolved code after repair attempts: invalid syntax (<string>, line 1)
Finished evaluation. Operator 4 failed and was assigned a fitness of -inf.

--- Evaluating Operator Individual 5/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 10.52%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 16.90%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 15.96%
Finished evaluation. Operator 5 primary fitness: -11.3403

--- Evaluating Operator Individual 6/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive exploration-exploitation balance.
    - Elitism (deep copies)
    - Adaptive tournament selection
    - Mixed crossover (uniform/arithmetic/whole-tensor)
    - Adaptive mutation (Gaussian + reset) with immigrant injection under low diversity/spread
    """
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []

    # ----------------- Helpers -----------------
    def clone_model(src_model):
        m = SimpleNet().to(device)
        m.load_state_dict(src_model.state_dict())
        return m

    def flatten_vector(model, max_elems=50000):
        with torch.no_grad():
            vecs = []
            count = 0
            for p in model.parameters():
                t = p.detach().float().view(-1).cpu()
                if count + t.numel() <= max_elems:
                    vecs.append(t)
                    count += t.numel()
                else:
                    remaining = max(0, max_elems - count)
                    if remaining > 0:
                        vecs.append(t[:remaining])
                        count += remaining
                    break
            if len(vecs) == 0:
                return torch.zeros(1)
            return torch.cat(vecs)

    def estimate_diversity_and_spread(population, scores):
        # Diversity from top-K models using cosine similarity of flattened weights
        K = min(6, len(population))
        if K < 2:
            diversity = 0.0
        else:
            top_idx = torch.argsort(scores, descending=True)[:K]
            mats = []
            for idx in top_idx:
                v = flatten_vector(population[int(idx)])
                if v.norm() == 0:
                    v = v + 1e-6
                mats.append(v / (v.norm() + 1e-8))
            V = torch.stack(mats)  # K x D
            cos = (V @ V.t())
            # exclude diagonal
            avg_offdiag = (cos.sum() - torch.trace(cos)) / (K * (K - 1))
            avg_offdiag = torch.clamp(avg_offdiag, -1.0, 1.0)
            diversity = float(1.0 - avg_offdiag)
        # Spread of fitness
        s = scores.float()
        mean_abs = torch.mean(torch.abs(s)) + 1e-6
        std = torch.std(s)
        spread_ratio = float(std / mean_abs)
        return diversity, spread_ratio

    def dynamic_params(diversity, spread_ratio):
        # Targets
        target_diversity = 0.18
        target_spread = 0.08

        lack_div = max(0.0, (target_diversity - diversity) / max(1e-6, target_diversity))
        lack_spread = max(0.0, (target_spread - spread_ratio) / max(1e-6, target_spread))

        exploration_pressure = min(2.0, 0.5 * lack_spread + 0.5 * lack_div)  # [0, 2]
        # Mutation scaling factor
        mut_scale = 1.0 + 1.5 * exploration_pressure  # [1, 4]
        # Immigrant rate
        imm_rate = min(0.25, 0.04 * (1.0 + 2.0 * exploration_pressure))  # up to 25%
        # Tournament size (smaller -> more exploration when exploration_pressure high)
        t_min, t_max = 3, max(4, min(9, POP_SIZE // 4 + 3))
        tsize = int(round(t_max - (t_max - t_min) * min(1.0, 0.5 + 0.5 * diversity)))
        tsize = max(3, min(t_max, tsize))
        # Crossover probability adapts inversely to exploration (slightly)
        cx_prob = max(0.65, 0.9 - 0.15 * exploration_pressure)

        return mut_scale, imm_rate, tsize, cx_prob

    def tournament_selection(population, scores, tsize):
        idxs = torch.randint(low=0, high=len(population), size=(tsize,))
        best_local = idxs[torch.argmax(scores[idxs])]
        return population[int(best_local)]

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        cdict = child.state_dict()
        with torch.no_grad():
            for k in cdict.keys():
                a = p1[k]
                b = p2[k]
                # Non-floating tensors (e.g., BatchNorm counters): take from one parent
                if not torch.is_floating_point(cdict[k]):
                    pick_first = bool(torch.randint(0, 2, (1,), device=device).item())
                    cdict[k] = a.clone() if pick_first else b.clone()
                    continue

                shape = a.shape
                r = torch.rand((), device=device)
                if r < 0.10:
                    # Whole-tensor pick
                    pick_first = bool(torch.randint(0, 2, (1,), device=device).item())
                    cdict[k] = a.clone() if pick_first else b.clone()
                elif r < 0.50:
                    # Element-wise uniform mask
                    mask = (torch.rand(shape, device=device) < 0.5).to(a.dtype)
                    cdict[k] = a * mask + b * (1.0 - mask)
                else:
                    # Arithmetic blend with random alpha per-tensor
                    alpha = torch.rand((), device=device)
                    cdict[k] = alpha * a + (1.0 - alpha) * b

                # Light crossover noise
                if torch.is_floating_point(cdict[k]):
                    # Scale noise by parameter scale
                    scale = torch.std(cdict[k].float())
                    noise = torch.randn_like(cdict[k]) * (0.01 * (float(scale) + 1e-6))
                    cdict[k].add_(noise)
        child.load_state_dict(cdict)
        return child

    def mutate(model, mut_scale=1.0):
        base_point_rate = 0.02
        base_sigma = 0.02
        base_reset_rate = 0.0005
        tensor_noise_prob = 0.02

        point_rate = min(0.5, base_point_rate * mut_scale)
        sigma = base_sigma * mut_scale
        reset_rate = min(0.1, base_reset_rate * mut_scale)

        with torch.no_grad():
            for p in model.parameters():
                if not torch.is_floating_point(p):
                    continue
                # Element-wise perturbation
                mask = torch.rand_like(p) < point_rate
                if mask.any():
                    scale = torch.std(p.float()) + 1e-6
                    noise = torch.randn_like(p) * (sigma * float(scale))
                    p[mask] = p[mask] + noise[mask]

                # Occasional whole-tensor small noise
                if torch.rand((), device=device) < tensor_noise_prob * min(2.0, mut_scale):
                    scale = torch.std(p.float()) + 1e-6
                    p.add_(torch.randn_like(p) * (0.5 * sigma * float(scale)))

                # Reset mutation
                rmask = torch.rand_like(p) < reset_rate
                if rmask.any():
                    scale = torch.std(p.float()) + 1e-6
                    new_weights = torch.randn_like(p) * float(scale)
                    p[rmask] = new_weights[rmask]
        return model

    # ----------------- Main -----------------
    scores = fitness_scores.detach().float()
    diversity, spread_ratio = estimate_diversity_and_spread(current_population, scores)
    mut_scale, imm_rate, TOURNAMENT_SIZE, CX_PROB = dynamic_params(diversity, spread_ratio)

    # Elitism
    ELITISM_RATE = 0.08
    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    sorted_idx = torch.argsort(scores, descending=True)
    elites = [clone_model(current_population[int(i)]) for i in sorted_idx[:num_elite]]

    # Slightly perturb a subset of elites for soft exploration
    if num_elite > 1:
        elite_mutate_count = max(1, num_elite // 2)
        for i in range(elite_mutate_count):
            elites[i] = mutate(elites[i], mut_scale=0.5)

    new_population = []
    new_population.extend(elites)

    # Immigrants (random new individuals)
    num_immigrants = min(POP_SIZE - len(new_population), int(POP_SIZE * imm_rate))
    for _ in range(num_immigrants):
        m = SimpleNet().to(device)
        # Give immigrants a stronger initial mutation to diversify
        m = mutate(m, mut_scale=1.5 * mut_scale)
        new_population.append(m)

    # Fill the rest via selection, crossover, mutation
    while len(new_population) < POP_SIZE:
        parent1 = tournament_selection(current_population, scores, TOURNAMENT_SIZE)
        # Select second parent; encourage diversity by sampling a few and picking the most different
        candidates = [tournament_selection(current_population, scores, TOURNAMENT_SIZE) for _ in range(3)]
        # Choose most different from parent1 among candidates by comparing a small random projection
        p1_vec = None
        best_cand = candidates[0]
        best_score = -1.0
        for cand in candidates:
            # quick proxy: compare norms difference of a single parameter tensor
            with torch.no_grad():
                p1p = next(parent1.parameters()).detach().float().view(-1)
                p2p = next(cand.parameters()).detach().float().view(-1)
                ln = min(p1p.numel(), p2p.numel(), 1024)
                d = torch.norm(p1p[:ln] - p2p[:ln]) / (torch.norm(p1p[:ln]) + 1e-6)
                val = float(d)
                if val > best_score:
                    best_score = val
                    best_cand = cand
        parent2 = best_cand

        do_crossover = torch.rand((), device=device) < CX_PROB
        if bool(do_crossover.item()):
            child = crossover(parent1, parent2)
        else:
            child = clone_model(parent1)
        child = mutate(child, mut_scale=mut_scale)
        new_population.append(child)

    # Ensure exact population size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 9.62%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 15.64%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 17.98%
Finished evaluation. Operator 6 primary fitness: -11.4327

--- Meta-Generation 1 Results ---
Best Operator Fitness (Avg Final Best): -11.3403
Best Performing Operator's Code:


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population
```

Best Operator's Performance Summary:


This operator's performance has been recorded over **3** separate runs.
The table below shows the fitness dynamics, **averaged across all runs**.
Fitness is based on the negative loss on training batches (higher is better).

| Gen | Best Fitness | Avg Fitness  | Worst Fitness | Spread (Diversity) |
|:---:|:------------:|:------------:|:-------------:|:------------------:|
|  0  |     -11.4630 |     -11.5397 |      -11.6121 |             0.1491 |
|  1  |     -11.4313 |     -11.5767 |      -12.1839 |             0.7526 |
|  2  |     -11.4298 |     -11.5916 |      -11.9507 |             0.5209 |
|  3  |     -11.3812 |     -11.6047 |      -11.9569 |             0.5757 |
|  4  |     -11.3890 |     -11.6302 |      -12.2664 |             0.8774 |
|  5  |     -11.3751 |     -11.6719 |      -13.6228 |             2.2477 |
|  6  |     -11.3837 |     -11.6159 |      -12.1176 |             0.7339 |
|  7  |     -11.3677 |     -11.6666 |      -12.3902 |             1.0226 |
|  8  |     -11.3426 |     -11.6645 |      -12.4404 |             1.0978 |
|  9  |     -11.3403 |     -11.6956 |      -13.2220 |             1.8817 |

**Analysis Hints for Your Evolution:**
- **Rate of Improvement:** Analyze the slope of the `Best Fitness` column. A steep, consistent increase is ideal.
- **Population Diversity:** The `Spread (Best-Worst)` column is a proxy for diversity. If it collapses to near-zero too quickly, the population has prematurely converged, and you should consider changes that increase exploration (e.g., higher mutation, different selection).
- **Stability:** Smooth, predictable improvements indicate a stable operator. Jagged or erratic values might suggest the operator is too chaotic.

--- Calling Google API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---

Evolving the `generate_next_population` operator...

--- Evaluating Operator Individual 2/6 ---


```python
```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator evolved for stable improvement.
    - Increased elitism to preserve high-performing individuals.
    - Adaptive tournament size with slightly reduced pressure in unstable states.
    - Refined crossover: standard arithmetic blend for exploitation, BLX for exploration.
    - Tuned mutation: reduced destructive reset mutations to improve average fitness.
    - Strategic immigrant injection, paused during unstable phases to aid convergence.
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.4 # Slightly more sensitive to instability

    # --- Adaptive hyperparameters (Evolved) ---
    if low_diversity:
        # Increase exploration and diversity
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 2e-4  # Reduced destructive mutation
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"
        BLX_ALPHA = 0.15       # Reduced BLX range
    elif unstable:
        # Stabilize population, increase exploitation
        ELITISM_RATE = 0.12      # Higher elitism to lock in gains
        TOURNAMENT_SIZE = 6      # Reduced pressure
        GAUSS_MUT_PROB = 0.005
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.00   # No random immigrants when unstable
        CROSSOVER_MODE = "uniform"
    else: # Balanced state
        ELITISM_RATE = 0.12
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.015   # Increased local search
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 5e-5    # Reduced destructive mutation
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "arithmetic_blend"

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1_sd = parent1.state_dict()
        p2_sd = parent2.state_dict()
        child_sd = child.state_dict()

        with torch.no_grad():
            for k in child_sd.keys():
                t1 = p1_sd[k]
                t2 = p2_sd[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    child_sd[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    child_sd[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # arithmetic_blend
                    alpha = torch.rand((), device=device) # Uniform random blend factor
                    child_sd[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(child_sd)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p.add_(noise * gmask)
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    p[rmask] = torch.randn_like(p[rmask])
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = idxs[torch.argmax(sub_scores)].item()
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = [clone_model(current_population[i.item()]) for i in sorted_idx[:num_elite]]

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        parent1 = tournament_selection(current_population, scores)
        parent2 = parent1
        attempts = 0
        while parent2 is parent1 and attempts < 5:
            parent2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size to match POP_SIZE
    while len(new_population) < POP_SIZE:
        # Fill shortage with mutated clones of the best individuals
        filler_parent = elites[len(new_population) % len(elites)]
        new_individual = mutate(clone_model(filler_parent))
        new_population.append(new_individual)

    return new_population[:POP_SIZE]
```
```

Error executing evolved code. Attempting LLM repair (1/1).
--- Calling Google API to repair operator ---
Error executing evolved code after repair attempts: invalid syntax (<string>, line 1)
Finished evaluation. Operator 2 failed and was assigned a fitness of -inf.

--- Evaluating Operator Individual 3/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with rank-based selection, bias-preserving crossover,
    and fitness-adaptive mutation to increase exploitation stability without collapsing diversity.
    """
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []
    if POP_SIZE == 1:
        # Keep population size; apply light mutation to explore a bit
        lone = SimpleNet().to(device)
        lone.load_state_dict(current_population[0].state_dict())
        with torch.no_grad():
            for p in lone.parameters():
                std = torch.std(p)
                scale = (std if torch.isfinite(std) and std > 0 else torch.tensor(1.0, device=device))
                mask = torch.rand_like(p) < 0.01
                p[mask] = p[mask] + torch.randn_like(p[mask]) * 0.03 * scale
        return [lone]

    # Prepare fitness tensor
    if not torch.is_tensor(fitness_scores):
        scores = torch.tensor(fitness_scores, dtype=torch.float32, device=device)
    else:
        scores = fitness_scores.to(device=device, dtype=torch.float32)

    # Diversity diagnostics
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)
    cv_val = float(cv.item())

    low_diversity = cv_val < 0.08
    high_diversity = cv_val > 0.30

    # Adaptive hyperparameters (tuned for more stable improvement)
    if high_diversity:
        ELITISM_RATE = 0.18
        GAUSS_MUT_PROB = 0.004
        GAUSS_SIGMA = 0.02
        RESET_MUT_PROB = 0.0
        IMMIGRANTS_RATE = 0.0
        CROSSOVER_MODE = "biased"
        CROSSOVER_RATE = 0.90
        SELECTION_Q = 0.22  # stronger pressure toward top
        BLX_ALPHA = 0.10
        BASE_BIAS = 0.64
    elif low_diversity:
        ELITISM_RATE = 0.08
        GAUSS_MUT_PROB = 0.015
        GAUSS_SIGMA = 0.08
        RESET_MUT_PROB = 3e-4
        IMMIGRANTS_RATE = 0.04
        CROSSOVER_MODE = "explore"
        CROSSOVER_RATE = 0.98
        SELECTION_Q = 0.35  # flatter selection to widen search
        BLX_ALPHA = 0.25
        BASE_BIAS = 0.56
    else:
        ELITISM_RATE = 0.14
        GAUSS_MUT_PROB = 0.007
        GAUSS_SIGMA = 0.035
        RESET_MUT_PROB = 5e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "mixed"
        CROSSOVER_RATE = 0.93
        SELECTION_Q = 0.28
        BLX_ALPHA = 0.15
        BASE_BIAS = 0.60

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # Utilities
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def biased_arithmetic_mix(t1, t2, better_first=True, base_bias=0.6, jitter=0.1):
        # Per-element alpha around base_bias with small jitter
        alpha = base_bias + (torch.rand_like(t1) - 0.5) * 2.0 * jitter
        alpha = torch.clamp(alpha, 0.4, 0.9)
        if better_first:
            return alpha * t1 + (1.0 - alpha) * t2
        else:
            return alpha * t2 + (1.0 - alpha) * t1

    def crossover(parent1, parent2, s1, s2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()
        better_is_p1 = bool(s1 >= s2)

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if torch.rand((), device=device) > CROSSOVER_RATE:
                    # No crossover: take better parent weights directly
                    c[k] = t1.clone() if better_is_p1 else t2.clone()
                    continue

                if CROSSOVER_MODE == "biased":
                    # Bias towards better parent with mild jitter
                    c[k] = biased_arithmetic_mix(t1, t2, better_first=better_is_p1, base_bias=BASE_BIAS, jitter=0.08)
                elif CROSSOVER_MODE == "explore":
                    # Mostly BLX for exploration, sometimes biased arithmetic
                    if torch.rand((), device=device) < 0.7:
                        c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                    else:
                        c[k] = biased_arithmetic_mix(t1, t2, better_first=better_is_p1, base_bias=0.55, jitter=0.15)
                else:  # "mixed"
                    # Blend: 60% biased arithmetic, 40% BLX
                    if torch.rand((), device=device) < 0.6:
                        c[k] = biased_arithmetic_mix(t1, t2, better_first=better_is_p1, base_bias=BASE_BIAS, jitter=0.10)
                    else:
                        c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob, gauss_sigma, reset_prob, intensity=1.0):
        # Fitness-adaptive mutation: scale by intensity; scale noise by parameter std
        flip_base = 1e-4  # extremely small sign-flip probability
        gprob = min(0.5, gauss_prob * intensity)
        rsprob = min(0.05, reset_prob * (0.5 + 0.5 * intensity))
        flip_prob = min(0.01, flip_base * intensity)

        with torch.no_grad():
            for p in model.parameters():
                p_std = torch.std(p)
                scale = (p_std if torch.isfinite(p_std) and p_std > 0 else torch.tensor(1.0, device=p.device))
                # Gaussian perturbation
                if gprob > 0.0:
                    gmask = torch.rand_like(p) < gprob
                    if gmask.any():
                        noise = torch.randn_like(p) * (gauss_sigma * (0.5 + 0.75 * intensity)) * scale
                        p[gmask] = p[gmask] + noise[gmask]
                # Occasional reset
                if rsprob > 0.0:
                    rmask = torch.rand_like(p) < rsprob
                    if rmask.any():
                        p[rmask] = torch.randn_like(p[rmask]) * scale
                # Rare sign flip to escape symmetry
                if flip_prob > 0.0:
                    fmask = torch.rand_like(p) < flip_prob
                    if fmask.any():
                        p[fmask] = -p[fmask]
        return model

    # Rank-based selection probabilities (exponential ranking)
    sorted_idx = torch.argsort(scores, descending=True)
    ranks = torch.empty(POP_SIZE, device=device, dtype=torch.float32)
    ranks[sorted_idx] = torch.arange(POP_SIZE, device=device, dtype=torch.float32)
    # Lower rank -> higher probability
    weights = (1.0 - SELECTION_Q) ** ranks
    weights = weights / (weights.sum() + 1e-12)

    def select_index():
        # Sample with replacement
        idx = torch.multinomial(weights, 1, replacement=True)
        return int(idx.item())

    # Build next population
    new_population = []

    # Elites
    for i in range(num_elite):
        elite_idx = int(sorted_idx[i].item())
        new_population.append(clone_model(current_population[elite_idx]))

    # Immigrants (minimal when diversity is already high)
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children
    for _ in range(num_children):
        i1 = select_index()
        i2 = select_index()
        # Try to ensure different parents when possible
        tries = 0
        while i2 == i1 and tries < 3:
            i2 = select_index()
            tries += 1

        p1 = current_population[i1]
        p2 = current_population[i2]
        s1 = scores[i1]
        s2 = scores[i2]

        # Crossover
        child = crossover(p1, p2, s1, s2)

        # Mutation intensity based on average rank (worse rank -> stronger mutation)
        r1 = ranks[i1] / (POP_SIZE - 1 + 1e-8)
        r2 = ranks[i2] / (POP_SIZE - 1 + 1e-8)
        r_avg = float(((r1 + r2) * 0.5).item())
        intensity = 0.6 + 1.6 * r_avg  # in [0.6, 2.2)

        # Mutate
        child = mutate(child, GAUSS_MUT_PROB, GAUSS_SIGMA, RESET_MUT_PROB, intensity=intensity)
        new_population.append(child)

    # Safety size adjustments
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            elite_idx = int(sorted_idx[i % num_elite].item())
            extra = clone_model(current_population[elite_idx])
            new_population.append(extra)

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 12.85%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 21.16%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 11.98%
Finished evaluation. Operator 3 primary fitness: -11.4180

--- Evaluating Operator Individual 5/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    '''
    Holistic evolutionary operator with adaptive mutation, soft crossover, elitism, tournament selection,
    and diversity-preserving random immigrants.
    '''
    POPULATION_SIZE = len(current_population)
    ELITISM_RATE = 0.05
    TOURNAMENT_SIZE = 3
    IMMIGRANT_RATE = 0.05

    # --- Diversity-adaptive mutation parameters (based on fitness spread) ---
    best = torch.max(fitness_scores).item()
    worst = torch.min(fitness_scores).item()
    spread = best - worst
    if spread < 0.10:
        NOISE_P = 0.06
        NOISE_STD_FACTOR = 0.25
        RESET_PROB = 0.001
    elif spread < 0.15:
        NOISE_P = 0.03
        NOISE_STD_FACTOR = 0.15
        RESET_PROB = 0.0005
    else:
        NOISE_P = 0.015
        NOISE_STD_FACTOR = 0.10
        RESET_PROB = 0.0002

    # --- Helpers ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def crossover(parent1, parent2):
        # Smooth, per-weight blend crossover with random mixing coefficients
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        cdict = child.state_dict()
        for k in p1.keys():
            t1 = p1[k]
            t2 = p2[k]
            if t1.dtype.is_floating_point:
                alpha = torch.rand_like(t1)
                cdict[k] = alpha * t1 + (1.0 - alpha) * t2
            else:
                # For non-float buffers (e.g., counters), inherit from a random parent
                take_from_p1 = torch.rand(()) < 0.5
                cdict[k] = t1 if take_from_p1 else t2
        child.load_state_dict(cdict)
        return child

    def mutate(model):
        with torch.no_grad():
            for p in model.parameters():
                if not p.requires_grad:
                    continue
                # Scale noise to parameter scale
                param_std = torch.std(p).item()
                scaled_std = NOISE_STD_FACTOR * (param_std if param_std > 0 else 1.0)
                mask = torch.rand_like(p) < NOISE_P
                noise = torch.randn_like(p) * scaled_std
                p.add_(noise * mask)
                if RESET_PROB > 0:
                    reset_mask = torch.rand_like(p) < RESET_PROB
                    p.data[reset_mask] = torch.randn_like(p)[reset_mask] * (param_std if param_std > 0 else 1.0)
        return model

    def tournament_selection_idx(scores):
        idxs = torch.randint(0, POPULATION_SIZE, (TOURNAMENT_SIZE,))
        winner_local = torch.argmax(scores[idxs])
        return int(idxs[winner_local].item())

    # --- Build next population ---
    sorted_indices = torch.argsort(fitness_scores, descending=True)
    num_elite = max(1, int(POPULATION_SIZE * ELITISM_RATE))
    num_immigrants = max(1, int(POPULATION_SIZE * IMMIGRANT_RATE))
    num_children_to_create = POPULATION_SIZE - num_elite - num_immigrants
    if num_children_to_create < 0:
        num_children_to_create = 0
        num_immigrants = POPULATION_SIZE - num_elite

    new_population = []
    # Elitism (deep-cloned to avoid accidental in-place modifications)
    for i in range(num_elite):
        elite = clone_model(current_population[int(sorted_indices[i].item())])
        new_population.append(elite)

    # Children via selection + crossover + mutation
    for _ in range(num_children_to_create):
        p1_idx = tournament_selection_idx(fitness_scores)
        p2_idx = tournament_selection_idx(fitness_scores)
        # Ensure two different parents when possible
        attempts = 0
        while p2_idx == p1_idx and attempts < 5 and POPULATION_SIZE > 1:
            p2_idx = tournament_selection_idx(fitness_scores)
            attempts += 1
        parent1 = current_population[p1_idx]
        parent2 = current_population[p2_idx]
        child = crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)

    # Random immigrants to preserve exploration
    for _ in range(num_immigrants):
        immigrant = SimpleNet().to(device)
        new_population.append(immigrant)

    # Ensure population size (trim if any rounding issues)
    if len(new_population) > POPULATION_SIZE:
        new_population = new_population[:POPULATION_SIZE]
    elif len(new_population) < POPULATION_SIZE:
        # Fill missing with additional immigrants
        for _ in range(POPULATION_SIZE - len(new_population)):
            new_population.append(SimpleNet().to(device))

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 12.25%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 15.44%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 12.22%
Finished evaluation. Operator 5 primary fitness: -11.4744

--- Evaluating Operator Individual 6/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Improved GA operator with adaptive selection pressure, arithmetic crossover,
    and adaptive mixed mutation to preserve diversity while maintaining progress.
    """
    POPULATION_SIZE = len(current_population)
    ELITISM_RATE = 0.05

    # Compute diversity (fitness spread) and derive adaptive parameters
    scores_tensor = fitness_scores.detach()
    spread = (scores_tensor.max() - scores_tensor.min()).item() if len(scores_tensor) > 0 else 0.0
    # Diversity factor in [0,1]: higher when spread is low
    f = max(0.0, min(1.0, (0.15 - spread) / 0.15))

    # Adaptive selection pressure via tournament size
    TOURNAMENT_SIZE = max(3, int(round(5 - 2 * f)))  # 5 -> 3 as diversity drops

    # Adaptive mutation rates
    P_GAUSS = 0.002 + 0.010 * f     # per-weight gaussian mutation prob
    P_RESET = 0.0001 + 0.0009 * f   # per-weight reset prob
    P_REINIT_TENSOR = 0.002 * f     # per-tensor reinit prob
    MUT_INTENSITY = 1.0 + 0.5 * f   # noise scale multiplier

    # Random immigrants to inject exploration when diversity is low
    num_immigrants = int(POPULATION_SIZE * (0.05 * f))

    # --- Helpers ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def tournament_selection(population, scores_cpu, k):
        idx = torch.randint(0, len(population), (k,))
        winner_local = torch.argmax(scores_cpu[idx])
        return population[int(idx[winner_local].item())]

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        cd = child.state_dict()
        for key in cd.keys():
            t1 = p1[key]
            t2 = p2[key]
            if t1.dtype.is_floating_point:
                # Arithmetic crossover with per-tensor alpha for stability
                alpha = torch.empty(1, device=device).uniform_(0.3, 0.7).item()
                mixed = t1 * alpha + t2 * (1.0 - alpha)
                # With small chance, patch in pieces from a parent to keep discrete structure
                if torch.rand((), device=device).item() < 0.1:
                    mask = torch.rand_like(mixed) < 0.5
                    mixed = torch.where(mask, t1, mixed)
                cd[key] = mixed
            else:
                cd[key] = t1.clone()
        child.load_state_dict(cd)
        return child

    def mutate(model, p_gauss, p_reset, p_reinit, intensity):
        with torch.no_grad():
            for name, param in model.named_parameters():
                if not param.requires_grad:
                    continue

                # Occasional tensor reinitialization (helps escape local minima)
                if torch.rand((), device=device).item() < p_reinit:
                    if param.dim() >= 2:
                        torch.nn.init.kaiming_normal_(param, nonlinearity='relu')
                    else:
                        param.zero_()
                    continue

                # Gaussian mutation on a subset of weights
                if p_gauss > 0.0:
                    mask = torch.rand_like(param) < p_gauss
                    if mask.any():
                        # Scale noise by parameter statistics
                        std = param.std().item()
                        if not (std > 0.0):
                            std = 1.0
                        sigma = 0.02 * std * intensity
                        noise = torch.randn_like(param) * sigma
                        param.add_(noise * mask)

                # Reset mutation (rare, larger jumps)
                if p_reset > 0.0:
                    mask = torch.rand_like(param) < p_reset
                    if mask.any():
                        new_vals = torch.randn_like(param)
                        param.data[mask] = new_vals[mask]
        return model

    # --- Main GA loop ---
    scores_cpu = scores_tensor.detach().cpu()
    sorted_indices = torch.argsort(scores_tensor, descending=True)
    num_elite = int(max(1, round(POPULATION_SIZE * ELITISM_RATE)))

    new_population = []
    # Elitism: clone top performers to preserve them
    for i in sorted_indices[:num_elite]:
        new_population.append(clone_model(current_population[int(i.item())]))

    # Children
    num_children_to_create = POPULATION_SIZE - num_elite - num_immigrants
    num_children_to_create = max(0, num_children_to_create)

    for _ in range(num_children_to_create):
        # Select parents with adaptive tournament size and encourage different parents
        parent1 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
        parent2 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
        attempts = 0
        while parent2 is parent1 and attempts < 3:
            parent2 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
            attempts += 1

        child = crossover(parent1, parent2)
        child = mutate(child, P_GAUSS, P_RESET, P_REINIT_TENSOR, MUT_INTENSITY)
        new_population.append(child)

    # Random immigrants (fresh individuals) to boost exploration when needed
    for _ in range(num_immigrants):
        immigrant = SimpleNet().to(device)
        # Optionally small mutation to random init to diversify slightly
        immigrant = mutate(immigrant, P_GAUSS * 0.5, P_RESET * 0.5, P_REINIT_TENSOR * 0.5, MUT_INTENSITY * 0.5)
        new_population.append(immigrant)

    # If rounding led to size mismatch, adjust by cloning best or trimming
    if len(new_population) < POPULATION_SIZE:
        deficit = POPULATION_SIZE - len(new_population)
        for i in sorted_indices[:deficit]:
            new_population.append(clone_model(current_population[int(i.item())]))
    elif len(new_population) > POPULATION_SIZE:
        new_population = new_population[:POPULATION_SIZE]

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 16.18%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 12.80%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 17.86%
Finished evaluation. Operator 6 primary fitness: -11.3729

--- Meta-Generation 2 Results ---
Best Operator Fitness (Avg Final Best): -11.3403
Best Performing Operator's Code:


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population
```

Best Operator's Performance Summary:


This operator's performance has been recorded over **3** separate runs.
The table below shows the fitness dynamics, **averaged across all runs**.
Fitness is based on the negative loss on training batches (higher is better).

| Gen | Best Fitness | Avg Fitness  | Worst Fitness | Spread (Diversity) |
|:---:|:------------:|:------------:|:-------------:|:------------------:|
|  0  |     -11.4630 |     -11.5397 |      -11.6121 |             0.1491 |
|  1  |     -11.4313 |     -11.5767 |      -12.1839 |             0.7526 |
|  2  |     -11.4298 |     -11.5916 |      -11.9507 |             0.5209 |
|  3  |     -11.3812 |     -11.6047 |      -11.9569 |             0.5757 |
|  4  |     -11.3890 |     -11.6302 |      -12.2664 |             0.8774 |
|  5  |     -11.3751 |     -11.6719 |      -13.6228 |             2.2477 |
|  6  |     -11.3837 |     -11.6159 |      -12.1176 |             0.7339 |
|  7  |     -11.3677 |     -11.6666 |      -12.3902 |             1.0226 |
|  8  |     -11.3426 |     -11.6645 |      -12.4404 |             1.0978 |
|  9  |     -11.3403 |     -11.6956 |      -13.2220 |             1.8817 |

**Analysis Hints for Your Evolution:**
- **Rate of Improvement:** Analyze the slope of the `Best Fitness` column. A steep, consistent increase is ideal.
- **Population Diversity:** The `Spread (Best-Worst)` column is a proxy for diversity. If it collapses to near-zero too quickly, the population has prematurely converged, and you should consider changes that increase exploration (e.g., higher mutation, different selection).
- **Stability:** Smooth, predictable improvements indicate a stable operator. Jagged or erratic values might suggest the operator is too chaotic.

--- Calling Openai API to evolve operator (with generational feedback) ---
--- Calling Google API to evolve operator (with generational feedback) ---
--- Calling Google API to evolve operator (with generational feedback) ---

Evolving the `generate_next_population` operator...

--- Evaluating Operator Individual 4/6 ---


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with stability-focused adaptation.
    - Rank-based adaptive mutation intensity
    - Tournament selection with regime-aware pressure
    - Simulated Binary Crossover (SBX) for controlled exploration
    - Per-parameter scale-aware Gaussian mutation
    - Guided immigrants only when diversity is low
    """
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []

    # Prepare fitness tensor
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Diversity diagnostics
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = (std / (torch.abs(mean) + 1e-8)).clamp(min=0.0).item()

    # Regimes
    low_diversity = cv < 0.06
    unstable = cv > 0.35

    # Adaptive hyperparameters
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        BASE_MUT_PROB = 0.02
        BASE_MUT_SIGMA = 0.12
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.06
        SBX_ETA = 5.0  # more exploratory
    elif unstable:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 6
        BASE_MUT_PROB = 0.004
        BASE_MUT_SIGMA = 0.02
        RESET_MUT_PROB = 0.0
        IMMIGRANTS_RATE = 0.0
        SBX_ETA = 15.0  # conservative crossover
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 4
        BASE_MUT_PROB = 0.01
        BASE_MUT_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.01
        SBX_ETA = 10.0

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # Rank-based intensity shaping: rank 0 (best) -> 0.0, worst -> 1.0
    sorted_idx = torch.argsort(scores, descending=True)
    ranks = torch.empty_like(sorted_idx, dtype=torch.float32)
    if POP_SIZE > 1:
        ranks[sorted_idx] = torch.linspace(0.0, 1.0, steps=POP_SIZE, device=device)
    else:
        ranks[sorted_idx] = 0.0

    # Utilities
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def sbx_tensor(p1, p2, eta):
        # Simulated Binary Crossover for tensors
        u = torch.rand_like(p1)
        one = torch.ones_like(u)
        two = one * 2.0
        inv = 1.0 / (eta + 1.0)
        beta = torch.where(
            u <= 0.5,
            (two * u) ** inv,
            (one / (two * (1.0 - u))) ** inv,
        )
        c1 = 0.5 * ((1 + beta) * p1 + (1 - beta) * p2)
        c2 = 0.5 * ((1 - beta) * p1 + (1 + beta) * p2)
        # pick per-element between c1 and c2 to avoid bias
        pick_mask = (torch.rand_like(u) < 0.5)
        return torch.where(pick_mask, c1, c2)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        sd1 = parent1.state_dict()
        sd2 = parent2.state_dict()
        c = child.state_dict()
        with torch.no_grad():
            for k in c.keys():
                t1 = sd1[k]
                t2 = sd2[k]
                # Apply SBX; for very small tensors (e.g., biases), fallback to simple average with jitter
                if t1.numel() >= 4:
                    c[k] = sbx_tensor(t1, t2, SBX_ETA)
                else:
                    alpha = 0.5
                    blended = alpha * t1 + (1.0 - alpha) * t2
                    jitter = 0.01 * torch.randn_like(blended)
                    c[k] = blended + jitter
        child.load_state_dict(c)
        return child

    def mutate(model, base_prob, base_sigma, reset_prob, intensity_scale=1.0):
        # intensity_scale adjusts both probability and sigma per individual
        prob = float(torch.clamp(torch.tensor(base_prob * intensity_scale, device=device), 1e-6, 0.5).item())
        sigma_scale = float(torch.clamp(torch.tensor(intensity_scale, device=device), 0.25, 2.0).item())
        with torch.no_grad():
            for p in model.parameters():
                # Per-parameter scale-aware sigma
                p_std = p.detach().std()
                # fallback scale if near-constant tensor
                eff_std = p_std if torch.isfinite(p_std) and p_std > 1e-6 else torch.tensor(1.0, device=device)
                eff_sigma = base_sigma * sigma_scale * eff_std
                if prob > 0.0:
                    gmask = (torch.rand_like(p) < prob)
                    if torch.any(gmask):
                        noise = torch.randn_like(p) * eff_sigma
                        p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rprob = float(torch.clamp(torch.tensor(reset_prob * intensity_scale, device=device), 0.0, 0.05).item())
                    if rprob > 0.0:
                        rmask = (torch.rand_like(p) < rprob)
                        if torch.any(rmask):
                            # reset around 0 with variance relative to eff_std to avoid extreme outliers
                            new_vals = torch.randn_like(p) * (eff_std * 2.0)
                            p[rmask] = new_vals[rmask]
        return model

    def tournament_selection_index(pop, scores_tensor, size):
        idxs = torch.randint(0, len(pop), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return winner_idx

    # Build elites
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Guided immigrants for diversity (only when configured)
    def guided_immigrant():
        # Mix: half fresh random, half heavily mutated clone of a random parent
        if torch.rand((), device=device) < 0.5:
            return SimpleNet().to(device)
        else:
            ridx = int(torch.randint(0, POP_SIZE, (1,), device=device).item())
            m = clone_model(current_population[ridx])
            return mutate(m, base_prob=max(BASE_MUT_PROB, 0.015), base_sigma=max(BASE_MUT_SIGMA, 0.10), reset_prob=max(RESET_MUT_PROB, 5e-4), intensity_scale=2.0)

    for _ in range(num_immigrants):
        new_population.append(guided_immigrant())

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1_idx = tournament_selection_index(current_population, scores, TOURNAMENT_SIZE)
        p2_idx = p1_idx
        attempts = 0
        while p2_idx == p1_idx and attempts < 4:
            p2_idx = tournament_selection_index(current_population, scores, TOURNAMENT_SIZE)
            attempts += 1

        p1 = current_population[p1_idx]
        p2 = current_population[p2_idx]

        # Parent-quality-based mutation intensity (0 best -> 0.7, worst -> 1.3)
        r1 = ranks[p1_idx].item()
        r2 = ranks[p2_idx].item()
        rmean = (r1 + r2) * 0.5
        intensity = 0.7 + 0.6 * rmean

        child = crossover(p1, p2)
        child = mutate(child, base_prob=BASE_MUT_PROB, base_sigma=BASE_MUT_SIGMA, reset_prob=RESET_MUT_PROB, intensity_scale=float(intensity))
        new_population.append(child)

    # Adjust to exact size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            elite_src = current_population[int(sorted_idx[i % len(sorted_idx)].item())]
            extra = clone_model(elite_src)
            new_population.append(mutate(extra, base_prob=BASE_MUT_PROB, base_sigma=BASE_MUT_SIGMA, reset_prob=RESET_MUT_PROB, intensity_scale=1.0))

    return new_population
```

Starting 3 detailed evaluation runs for this operator...
  > Run 1/3...
    Run 1 finished. Final test accuracy of best model: 10.38%
  > Run 2/3...
    Run 2 finished. Final test accuracy of best model: 17.03%
  > Run 3/3...
    Run 3 finished. Final test accuracy of best model: 16.77%
Finished evaluation. Operator 4 primary fitness: -11.3768

--- Evaluating Operator Individual 5/6 ---


```python
```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Evolved holistic evolutionary operator.
    - Elitism to preserve best solutions.
    - Linear Rank Selection for stable selection pressure.
    - Hybrid crossover (Uniform/Arithmetic) for balanced gene exchange.
    - Multi-stage mutation (Creep, Gaussian, Reset) for fine-tuning and exploration.
    - Adaptive hyperparameters based on population diversity (CV).
    - Immigrant injection to prevent long-term stagnation.
    """
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    # --- Adaptive hyperparameters based on diversity ---
    if cv.item() < 0.03:  # Low diversity: Increase exploration
        ELITISM_RATE = 0.05
        SELECTION_PRESSURE = 1.3  # Closer to uniform random
        CREEP_PROB, CREEP_SIGMA = 0.02, 0.03
        GAUSS_PROB, GAUSS_SIGMA = 0.01, 0.10
        RESET_PROB = 5e-4
        IMMIGRANTS_RATE = 0.10
    elif cv.item() > 0.3:  # High diversity: Increase exploitation
        ELITISM_RATE = 0.15
        SELECTION_PRESSURE = 1.8  # Stronger selection
        CREEP_PROB, CREEP_SIGMA = 0.05, 0.01
        GAUSS_PROB, GAUSS_SIGMA = 0.005, 0.05
        RESET_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
    else:  # Balanced state
        ELITISM_RATE = 0.10
        SELECTION_PRESSURE = 1.6
        CREEP_PROB, CREEP_SIGMA = 0.04, 0.02
        GAUSS_PROB, GAUSS_SIGMA = 0.008, 0.08
        RESET_PROB = 1e-4
        IMMIGRANTS_RATE = 0.03

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1_sd = parent1.state_dict()
        p2_sd = parent2.state_dict()
        c_sd = child.state_dict()

        with torch.no_grad():
            for k in c_sd.keys():
                t1, t2 = p1_sd[k], p2_sd[k]
                # Hybrid: 50% chance of uniform crossover, 50% arithmetic per tensor
                if torch.rand(()) < 0.5:
                    mask = torch.rand_like(t1) < 0.5
                    c_sd[k].copy_(torch.where(mask, t1, t2))
                else:
                    alpha = torch.rand((), device=device)
                    c_sd[k].copy_(alpha * t1 + (1.0 - alpha) * t2)
        child.load_state_dict(c_sd)
        return child

    def mutate(model):
        with torch.no_grad():
            for p in model.parameters():
                # 1. Creep mutation (fine-tuning)
                if CREEP_PROB > 0:
                    mask = torch.rand_like(p) < CREEP_PROB
                    noise = torch.randn_like(p) * CREEP_SIGMA
                    p.add_(noise * mask)
                # 2. Gaussian mutation (larger jumps)
                if GAUSS_PROB > 0:
                    mask = torch.rand_like(p) < GAUSS_PROB
                    noise = torch.randn_like(p) * GAUSS_SIGMA
                    p.add_(noise * mask)
                # 3. Reset mutation (radical exploration)
                if RESET_PROB > 0:
                    mask = torch.rand_like(p) < RESET_PROB
                    new_vals = torch.randn_like(p) * p.std() # scale to layer's current variance
                    p[mask] = new_vals[mask]
        return model

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    
    # 1. Elitism
    elites = [clone_model(current_population[i.item()]) for i in sorted_idx[:num_elite]]
    new_population = list(elites)

    # 2. Immigrants (fresh random individuals)
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))
    
    # --- Setup for Rank Selection ---
    s = SELECTION_PRESSURE
    ranks_zero_based = torch.arange(POP_SIZE, dtype=torch.float32, device=device)
    # Linear ranking probability calculation
    probs = (s - (2 * s - 2) * ranks_zero_based / (POP_SIZE - 1)) / POP_SIZE
    if not torch.all(probs >= 0): # Safety check for floating point issues
        probs = torch.ones_like(probs) / POP_SIZE

    # 3. Children via selection, crossover, mutation
    for _ in range(num_children):
        # Select parents using rank-based probabilities
        parent_indices = torch.multinomial(probs, num_samples=2, replacement=True)
        p1_orig_idx = sorted_idx[parent_indices[0]].item()
        p2_orig_idx = sorted_idx[parent_indices[1]].item()
        
        p1 = current_population[p1_orig_idx]
        p2 = current_population[p2_orig_idx]
        
        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: Adjust size if rounding errors occur
    while len(new_population) < POP_SIZE:
        # Fill with mutated clones of the best individuals
        extra_child = clone_model(elites[len(new_population) % len(elites)])
        new_population.append(mutate(extra_child))
    
    return new_population[:POP_SIZE]
```
```

Error executing evolved code. Attempting LLM repair (1/1).
--- Calling Google API to repair operator ---
Error executing evolved code after repair attempts: invalid syntax (<string>, line 1)
Finished evaluation. Operator 5 failed and was assigned a fitness of -inf.

--- Evaluating Operator Individual 6/6 ---


```python
```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Evolved GA operator. Increases selection pressure and uses uniform crossover
    to enhance exploitation, while maintaining adaptive mutation for exploration.
    Aims for faster convergence based on the slow but steady progress observed.
    """
    # --- Tunable Parameters ---
    POPULATION_SIZE = len(current_population)
    # Increased elitism to preserve more top performers and accelerate exploitation.
    ELITISM_RATE = 0.10

    # --- Adaptive Parameter Calculation ---
    scores_tensor = fitness_scores.detach()
    spread = (scores_tensor.max() - scores_tensor.min()).item() if len(scores_tensor) > 0 else 0.0
    # Diversity factor in [0,1]: higher when spread is low.
    # The threshold is slightly lowered to be more responsive.
    f = max(0.0, min(1.0, (0.12 - spread) / 0.12))

    # Increased base selection pressure to favor fitter individuals more strongly.
    # Pressure reduces as diversity drops to allow for recovery.
    TOURNAMENT_SIZE = max(3, int(round(6 - 3 * f)))

    # Simplified and strengthened adaptive mutation scheme.
    # Higher base mutation rate for more consistent exploration.
    P_MUTATION = 0.004 + 0.01 * f
    # Given a mutation, a small chance it's a large "reset" jump.
    P_RESET_GIVEN_MUTATION = 0.05
    # Adaptive mutation intensity for Gaussian noise.
    MUT_INTENSITY = 0.5 + 1.5 * f

    # Random immigrants remain a key mechanism to inject diversity when stagnating.
    num_immigrants = int(POPULATION_SIZE * (0.05 * f))

    # --- Helper Functions ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def tournament_selection(population, scores_cpu, k):
        # Standard tournament selection: select k random individuals and return the best one.
        idx = torch.randint(0, len(population), (k,))
        winner_local_idx = torch.argmax(scores_cpu[idx])
        return population[int(idx[winner_local_idx].item())]

    def uniform_crossover(parent1, parent2):
        """
        Performs uniform crossover. For each weight, a random choice is made
        to take the weight from either parent1 or parent2. This preserves learned
        "building blocks" (groups of co-adapted weights) better than averaging.
        """
        child = SimpleNet().to(device)
        p1_sd = parent1.state_dict()
        p2_sd = parent2.state_dict()
        child_sd = child.state_dict()

        for key in child_sd.keys():
            t1 = p1_sd[key]
            t2 = p2_sd[key]
            if t1.dtype.is_floating_point:
                # Create a binary mask to select genes from parents.
                mask = torch.rand_like(t1) < 0.5
                child_sd[key] = torch.where(mask, t1, t2)
            else:
                # For non-floating point params (e.g., batchnorm stats), just copy from parent1.
                child_sd[key] = t1.clone()

        child.load_state_dict(child_sd)
        return child

    def mutate(model):
        """
        Applies mutation based on a single probability P_MUTATION.
        Mutated weights are either perturbed by scaled Gaussian noise or reset completely.
        """
        with torch.no_grad():
            for param in model.parameters():
                if not param.requires_grad:
                    continue

                # Create a single mask for all mutations on this tensor.
                mutation_mask = torch.rand_like(param) < P_MUTATION
                if not mutation_mask.any():
                    continue

                # Decide which of the selected weights get reset vs. perturbed.
                reset_mask = (torch.rand_like(param) < P_RESET_GIVEN_MUTATION) & mutation_mask

                # Apply Gaussian perturbation (on non-reset weights).
                gauss_mask = mutation_mask & ~reset_mask
                if gauss_mask.any():
                    std = param.std().item()
                    if not (std > 0.0): std = 1.0 # Avoid division by zero
                    sigma = 0.1 * std * MUT_INTENSITY # Noise scaled by weight std and adaptive intensity
                    noise = torch.randn_like(param) * sigma
                    param.add_(noise * gauss_mask)

                # Apply reset mutation (large jump).
                if reset_mask.any():
                    new_vals = torch.randn_like(param)
                    param.data[reset_mask] = new_vals[reset_mask]
        return model

    # --- Main GA Execution ---
    scores_cpu = scores_tensor.detach().cpu()
    sorted_indices = torch.argsort(scores_tensor, descending=True)
    num_elite = int(max(1, round(POPULATION_SIZE * ELITISM_RATE)))

    new_population = []
    # 1. Elitism: Directly copy the best individuals to the next generation.
    for i in sorted_indices[:num_elite]:
        new_population.append(clone_model(current_population[int(i.item())]))

    # 2. Crossover & Mutation: Create the bulk of the new generation.
    num_children_to_create = POPULATION_SIZE - num_elite - num_immigrants
    num_children_to_create = max(0, num_children_to_create)

    for _ in range(num_children_to_create):
        # Select parents using the stronger tournament selection.
        parent1 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
        parent2 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
        attempts = 0
        while parent2 is parent1 and attempts < 5: # Encourage different parents
            parent2 = tournament_selection(current_population, scores_cpu, TOURNAMENT_SIZE)
            attempts += 1

        child = uniform_crossover(parent1, parent2)
        child = mutate(child)
        new_population.append(child)

    # 3. Immigration: Add fresh, random individuals if diversity is low.
    for _ in range(num_immigrants):
        immigrant = SimpleNet().to(device)
        new_population.append(immigrant)

    # 4. Fill to size: Ensure population size is constant.
    while len(new_population) < POPULATION_SIZE:
        # If there's a deficit due to rounding, fill with clones of the absolute best.
        best_clone = clone_model(current_population[int(sorted_indices[0].item())])
        new_population.append(best_clone)
    if len(new_population) > POPULATION_SIZE:
        new_population = new_population[:POPULATION_SIZE]

    return new_population
```
```

Error executing evolved code. Attempting LLM repair (1/1).
--- Calling Openai API to repair operator ---
Error executing evolved code after repair attempts: invalid syntax (<string>, line 1)
Finished evaluation. Operator 6 failed and was assigned a fitness of -inf.

--- Meta-Generation 3 Results ---
Best Operator Fitness (Avg Final Best): -11.3403
Best Performing Operator's Code:


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population
```

Best Operator's Performance Summary:


This operator's performance has been recorded over **3** separate runs.
The table below shows the fitness dynamics, **averaged across all runs**.
Fitness is based on the negative loss on training batches (higher is better).

| Gen | Best Fitness | Avg Fitness  | Worst Fitness | Spread (Diversity) |
|:---:|:------------:|:------------:|:-------------:|:------------------:|
|  0  |     -11.4630 |     -11.5397 |      -11.6121 |             0.1491 |
|  1  |     -11.4313 |     -11.5767 |      -12.1839 |             0.7526 |
|  2  |     -11.4298 |     -11.5916 |      -11.9507 |             0.5209 |
|  3  |     -11.3812 |     -11.6047 |      -11.9569 |             0.5757 |
|  4  |     -11.3890 |     -11.6302 |      -12.2664 |             0.8774 |
|  5  |     -11.3751 |     -11.6719 |      -13.6228 |             2.2477 |
|  6  |     -11.3837 |     -11.6159 |      -12.1176 |             0.7339 |
|  7  |     -11.3677 |     -11.6666 |      -12.3902 |             1.0226 |
|  8  |     -11.3426 |     -11.6645 |      -12.4404 |             1.0978 |
|  9  |     -11.3403 |     -11.6956 |      -13.2220 |             1.8817 |

**Analysis Hints for Your Evolution:**
- **Rate of Improvement:** Analyze the slope of the `Best Fitness` column. A steep, consistent increase is ideal.
- **Population Diversity:** The `Spread (Best-Worst)` column is a proxy for diversity. If it collapses to near-zero too quickly, the population has prematurely converged, and you should consider changes that increase exploration (e.g., higher mutation, different selection).
- **Stability:** Smooth, predictable improvements indicate a stable operator. Jagged or erratic values might suggest the operator is too chaotic.

--- Calling Openai API to evolve operator (with generational feedback) ---

Evolving the `generate_next_population` operator...

--- Evaluating Operator Individual 6/6 ---


```python
```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with stability-aware exploitation and adaptive exploration.
    - Rank/softmax selection with adaptive temperature
    - Parent-biased blend crossover with optional BLX-alpha mixing
    - Scale-aware Gaussian mutation with per-offspring intensity
    - Elitism and elite-neighborhood exploitation
    - Adaptive immigrants injection based on diversity/instability
    """
    POP_SIZE = len(current_population)
    if POP_SIZE == 0:
        return []

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # --- Statistics & diagnostics ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    best = torch.max(scores)
    worst = torch.min(scores)
    spread = (best - worst).abs()
    denom = torch.clamp(torch.abs(mean), min=1e-6)
    rel_std = std / denom
    rel_spread = spread / denom

    # Heuristics from observed dynamics: sometimes large spread causes poor average
    low_diversity = (rel_spread.item() < 0.06) or (rel_std.item() < 0.06)
    high_instability = (rel_spread.item() > 0.16) or (rel_std.item() > 0.14)

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        IMMIGRANTS_RATE = 0.08
        EXPLOIT_RATE = 0.15  # mutated elite-neighborhood offspring
        TEMP = 1.25  # softer selection to promote exploration
        BASE_GAUSS_PROB = 0.020
        BASE_GAUSS_SIGMA = 0.10
        RESET_PROB = 5e-4
        BLX_ALPHA = 0.30
        BLX_RATIO = 0.60
        EXPLOIT_INTENSITY = 0.8
        EXPLORE_INTENSITY = 1.5
    elif high_instability:
        ELITISM_RATE = 0.14
        IMMIGRANTS_RATE = 0.005
        EXPLOIT_RATE = 0.35  # push toward strong regions to lift average
        TEMP = 0.70  # stronger selection
        BASE_GAUSS_PROB = 0.006
        BASE_GAUSS_SIGMA = 0.030
        RESET_PROB = 1e-5
        BLX_ALPHA = 0.08
        BLX_RATIO = 0.20
        EXPLOIT_INTENSITY = 0.5
        EXPLORE_INTENSITY = 0.9
    else:
        ELITISM_RATE = 0.12
        IMMIGRANTS_RATE = 0.015
        EXPLOIT_RATE = 0.25
        TEMP = 0.90
        BASE_GAUSS_PROB = 0.010
        BASE_GAUSS_SIGMA = 0.050
        RESET_PROB = 1e-4
        BLX_ALPHA = 0.15
        BLX_RATIO = 0.35
        EXPLOIT_INTENSITY = 0.6
        EXPLORE_INTENSITY = 1.1

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    remaining = POP_SIZE - num_elite - num_immigrants
    num_exploit_children = max(0, int(remaining * EXPLOIT_RATE))
    num_crossover_children = max(0, remaining - num_exploit_children)

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def parent_biased_blend(child, p1, p2, f1, f2, blx_ratio=BLX_RATIO, blx_alpha=BLX_ALPHA):
        sd1 = p1.state_dict()
        sd2 = p2.state_dict()
        sc = child.state_dict()
        # Compute bias toward better parent
        # Normalize by observed std to be scale-aware; fallback if std small
        scale = float(std.item() if std.item() > 1e-6 else 1.0)
        adv = float((f1 - f2).item()) / (2.0 * scale)
        # bias in [0.35, 0.65] skewed toward better parent
        base_bias = 0.5 + 0.15 * torch.tanh(torch.tensor(adv, device=device)).item()

        with torch.no_grad():
            for k in sc.keys():
                t1 = sd1[k]
                t2 = sd2[k]
                if not (t1.is_floating_point() and t2.is_floating_point()):
                    # Non-floating buffers (e.g., counters) - inherit from better parent
                    sc[k] = t1 if f1 >= f2 else t2
                    continue
                if torch.rand((), device=device) < blx_ratio:
                    sc[k] = blx_alpha_tensor(t1, t2, blx_alpha)
                else:
                    # Per-tensor noisy bias
                    # alpha tensor around base_bias with small noise, clipped to [0.05,0.95]
                    noise = 0.10 * torch.randn_like(t1)
                    alpha_tensor = torch.clamp(base_bias + noise, 0.05, 0.95)
                    sc[k] = alpha_tensor * t1 + (1.0 - alpha_tensor) * t2
        child.load_state_dict(sc)
        return child

    def mutate(model, base_gauss_prob, base_sigma, reset_prob, intensity=1.0):
        with torch.no_grad():
            for p in model.parameters():
                if not p.is_floating_point():
                    continue
                layer_std = torch.std(p).detach()
                # scale-aware sigma
                sigma = base_sigma * (layer_std + 1e-3) * float(intensity)
                gprob = min(0.5, base_gauss_prob * (0.5 + float(intensity)))
                if gprob > 0.0:
                    gmask = torch.rand_like(p) < gprob
                    noise = torch.randn_like(p) * sigma
                    p.add_(noise * gmask)
                rprob = min(0.2, reset_prob * (0.5 + float(intensity)))
                if rprob > 0.0:
                    rmask = torch.rand_like(p) < rprob
                    new_vals = torch.randn_like(p) * (layer_std + 1e-3)
                    p[rmask] = new_vals[rmask]
                # small shrinkage for stability
                p.mul_(1.0 - 0.001 * min(2.0, float(intensity)))
        return model

    # Selection probabilities via softmax (Boltzmann selection)
    # Higher TEMP -> flatter, lower TEMP -> greedier
    logits = scores / float(max(TEMP, 1e-6))
    probs = torch.softmax(logits - torch.max(logits), dim=0)

    def sample_parent_index():
        idx = torch.multinomial(probs, 1).item()
        return idx

    # Rank info for exploitation pool
    sorted_idx = torch.argsort(scores, descending=True)
    top_pool_size = max(2, int(0.30 * POP_SIZE))
    top_pool = sorted_idx[:top_pool_size]

    # --- Build next population ---
    new_population = []

    # Elites (pure clones)
    for i in range(num_elite):
        new_population.append(clone_model(current_population[int(sorted_idx[i].item())]))

    # Exploitation: mutated clones from top pool (micro to moderate intensity)
    for _ in range(num_exploit_children):
        src_idx = int(top_pool[torch.randint(0, top_pool_size, (1,), device=device)].item())
        offspring = clone_model(current_population[src_idx])
        offspring = mutate(offspring, BASE_GAUSS_PROB, BASE_GAUSS_SIGMA, RESET_PROB, intensity=EXPLOIT_INTENSITY)
        new_population.append(offspring)

    # Crossover + mutation children
    for _ in range(num_crossover_children):
        # ensure different parents where possible
        p1_idx = sample_parent_index()
        p2_idx = p1_idx
        attempts = 0
        while (p2_idx == p1_idx) and (attempts < 5):
            p2_idx = sample_parent_index()
            attempts += 1

        parent1 = current_population[p1_idx]
        parent2 = current_population[p2_idx]
        f1 = scores[p1_idx]
        f2 = scores[p2_idx]

        child = SimpleNet().to(device)
        child = parent_biased_blend(child, parent1, parent2, f1, f2)
        child = mutate(child, BASE_GAUSS_PROB, BASE_GAUSS_SIGMA, RESET_PROB, intensity=EXPLORE_INTENSITY)
        new_population.append(child)

    # Immigrants for exploration
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Safety adjustments
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # fill with additional mutated elite clones
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            idx = int(sorted_idx[i % len(sorted_idx)].item())
            extra = clone_model(current_population[idx])
            extra = mutate(extra, BASE_GAUSS_PROB, BASE_GAUSS_SIGMA, RESET_PROB, intensity=EXPLOIT_INTENSITY)
            new_population.append(extra)

    return new_population
```
```

Error executing evolved code. Attempting LLM repair (1/1).
--- Calling Openai API to repair operator ---
Error executing evolved code after repair attempts: invalid syntax (<string>, line 1)
Finished evaluation. Operator 6 failed and was assigned a fitness of -inf.

--- Meta-Generation 4 Results ---
Best Operator Fitness (Avg Final Best): -11.3403
Best Performing Operator's Code:


```python
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population
```

Best Operator's Performance Summary:


This operator's performance has been recorded over **3** separate runs.
The table below shows the fitness dynamics, **averaged across all runs**.
Fitness is based on the negative loss on training batches (higher is better).

| Gen | Best Fitness | Avg Fitness  | Worst Fitness | Spread (Diversity) |
|:---:|:------------:|:------------:|:-------------:|:------------------:|
|  0  |     -11.4630 |     -11.5397 |      -11.6121 |             0.1491 |
|  1  |     -11.4313 |     -11.5767 |      -12.1839 |             0.7526 |
|  2  |     -11.4298 |     -11.5916 |      -11.9507 |             0.5209 |
|  3  |     -11.3812 |     -11.6047 |      -11.9569 |             0.5757 |
|  4  |     -11.3890 |     -11.6302 |      -12.2664 |             0.8774 |
|  5  |     -11.3751 |     -11.6719 |      -13.6228 |             2.2477 |
|  6  |     -11.3837 |     -11.6159 |      -12.1176 |             0.7339 |
|  7  |     -11.3677 |     -11.6666 |      -12.3902 |             1.0226 |
|  8  |     -11.3426 |     -11.6645 |      -12.4404 |             1.0978 |
|  9  |     -11.3403 |     -11.6956 |      -13.2220 |             1.8817 |

**Analysis Hints for Your Evolution:**
- **Rate of Improvement:** Analyze the slope of the `Best Fitness` column. A steep, consistent increase is ideal.
- **Population Diversity:** The `Spread (Best-Worst)` column is a proxy for diversity. If it collapses to near-zero too quickly, the population has prematurely converged, and you should consider changes that increase exploration (e.g., higher mutation, different selection).
- **Stability:** Smooth, predictable improvements indicate a stable operator. Jagged or erratic values might suggest the operator is too chaotic.

--- Calling Google API to evolve operator (with generational feedback) ---
--- Calling Openai API to evolve operator (with generational feedback) ---

Meta-Evolution finished!
Final Best Performing Operator:


Overall Fitness (Avg Final Best): -11.3403
Evaluated over 3 runs.
--- Operator Code ---
def generate_next_population(current_population, fitness_scores, device, torch, SimpleNet):
    """
    Holistic evolutionary operator with adaptive diversity control.
    - Elitism with cloning
    - Adaptive tournament size
    - Dynamic crossover strategy (Uniform / BLX-alpha)
    - Adaptive Gaussian and reset mutation
    - Immigrants injection when diversity is low
    """
    POP_SIZE = len(current_population)

    # --- Prepare fitness tensor ---
    scores = fitness_scores
    if not torch.is_tensor(scores):
        scores = torch.tensor(scores, dtype=torch.float32, device=device)
    else:
        scores = scores.to(device=device, dtype=torch.float32)

    # Guard for degenerate populations
    if POP_SIZE == 0:
        return []

    # --- Diversity diagnostics (coefficient of variation) ---
    mean = torch.mean(scores)
    std = torch.std(scores)
    cv = std / (torch.abs(mean) + 1e-8)

    low_diversity = cv.item() < 0.05
    unstable = cv.item() > 0.5

    # --- Adaptive hyperparameters ---
    if low_diversity:
        ELITISM_RATE = 0.08
        TOURNAMENT_SIZE = 3
        GAUSS_MUT_PROB = 0.02
        GAUSS_SIGMA = 0.10
        RESET_MUT_PROB = 5e-4
        IMMIGRANTS_RATE = 0.05
        CROSSOVER_MODE = "blx"  # BLX-alpha crossover to expand search
        BLX_ALPHA = 0.20
    elif unstable:
        ELITISM_RATE = 0.07
        TOURNAMENT_SIZE = 7
        GAUSS_MUT_PROB = 0.006
        GAUSS_SIGMA = 0.03
        RESET_MUT_PROB = 1e-5
        IMMIGRANTS_RATE = 0.01
        CROSSOVER_MODE = "uniform"  # stabilize around good schemas
        BLX_ALPHA = 0.10
    else:
        ELITISM_RATE = 0.10
        TOURNAMENT_SIZE = 5
        GAUSS_MUT_PROB = 0.01
        GAUSS_SIGMA = 0.05
        RESET_MUT_PROB = 1e-4
        IMMIGRANTS_RATE = 0.02
        CROSSOVER_MODE = "mixed"  # combine uniform and arithmetic

    num_elite = max(1, int(POP_SIZE * ELITISM_RATE))
    num_immigrants = max(0, int(POP_SIZE * IMMIGRANTS_RATE))
    num_children = POP_SIZE - num_elite - num_immigrants

    # --- Utilities ---
    def clone_model(model):
        m = SimpleNet().to(device)
        m.load_state_dict(model.state_dict())
        return m

    def blx_alpha_tensor(p1, p2, alpha):
        low = torch.minimum(p1, p2)
        high = torch.maximum(p1, p2)
        span = high - low
        minv = low - alpha * span
        maxv = high + alpha * span
        return minv + (maxv - minv) * torch.rand_like(p1)

    def crossover(parent1, parent2):
        child = SimpleNet().to(device)
        p1 = parent1.state_dict()
        p2 = parent2.state_dict()
        c = child.state_dict()

        with torch.no_grad():
            for k in c.keys():
                t1 = p1[k]
                t2 = p2[k]
                if CROSSOVER_MODE == "uniform":
                    mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                    c[k] = t1 * mask + t2 * (1.0 - mask)
                elif CROSSOVER_MODE == "blx":
                    c[k] = blx_alpha_tensor(t1, t2, BLX_ALPHA)
                else:  # mixed
                    # 50% uniform mask, 50% arithmetic blend with per-element alpha in [0.25, 0.75]
                    if torch.rand((), device=device) < 0.5:
                        mask = (torch.rand_like(t1) < 0.5).to(t1.dtype)
                        c[k] = t1 * mask + t2 * (1.0 - mask)
                    else:
                        alpha = 0.25 + 0.5 * torch.rand_like(t1)
                        c[k] = alpha * t1 + (1.0 - alpha) * t2
        child.load_state_dict(c)
        return child

    def mutate(model, gauss_prob=GAUSS_MUT_PROB, gauss_sigma=GAUSS_SIGMA, reset_prob=RESET_MUT_PROB):
        with torch.no_grad():
            for p in model.parameters():
                if gauss_prob > 0.0:
                    gmask = torch.rand_like(p) < gauss_prob
                    noise = torch.randn_like(p) * gauss_sigma
                    p[gmask] = p[gmask] + noise[gmask]
                if reset_prob > 0.0:
                    rmask = torch.rand_like(p) < reset_prob
                    new_vals = torch.randn_like(p)
                    p[rmask] = new_vals[rmask]
        return model

    def tournament_selection(population, scores_tensor, size=TOURNAMENT_SIZE):
        idxs = torch.randint(0, len(population), (size,), device=device)
        sub_scores = scores_tensor[idxs]
        winner_idx = int(idxs[torch.argmax(sub_scores)].item())
        return population[winner_idx]

    # --- Build next population ---
    sorted_idx = torch.argsort(scores, descending=True)
    elites = []
    for i in range(num_elite):
        elites.append(clone_model(current_population[int(sorted_idx[i].item())]))

    new_population = list(elites)

    # Immigrants (fresh random individuals) for diversity
    for _ in range(num_immigrants):
        new_population.append(SimpleNet().to(device))

    # Children via selection, crossover, mutation
    for _ in range(num_children):
        p1 = tournament_selection(current_population, scores)
        # ensure different parents where possible
        p2 = p1
        attempts = 0
        while p2 is p1 and attempts < 3:
            p2 = tournament_selection(current_population, scores)
            attempts += 1

        child = crossover(p1, p2)
        child = mutate(child)
        new_population.append(child)

    # Safety: if rounding errors occur, adjust size
    if len(new_population) > POP_SIZE:
        new_population = new_population[:POP_SIZE]
    elif len(new_population) < POP_SIZE:
        # Fill up with additional mutated clones of elites
        fill_needed = POP_SIZE - len(new_population)
        for i in range(fill_needed):
            extra = clone_model(current_population[int(sorted_idx[i % len(sorted_idx)].item())])
            new_population.append(mutate(extra))

    return new_population