In [None]:
from dotenv import load_dotenv

load_dotenv(override=True)

In [None]:
import os

HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
assert HF_TOKEN

In [None]:
import torch
import re
from dataclasses import dataclass
from typing import List
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
@dataclass
class SearchConfig:
    # model to be loaded from huggingface
    model_id: str
    # number of candidates
    population_size: int
    # generations
    num_generations: int
    # how many best programs to add to the LLM context for generation
    # in paper terms, inspiration to the LLM
    num_parent_context: int


search_config = SearchConfig(
    model_id="google/gemma-2b-it",
    population_size=5,
    num_generations=10,
    num_parent_context=2,
)

In [None]:
@dataclass
class Program:
    """
    Represents a candidate solution (an 'Individual' in evolutionary terms).
    AlphaEvolve stores these in a Program Database.
    """

    code: str
    # initial score to inf
    # requires cuda (since torch inf isn't defined for cpus)
    fitness: float = -float("inf")

    def __repr__(self):
        return f"Program(fitness={self.fitness:.4f})"

In [None]:
class Evaluator:
    """
    The automated evaluator that assigns a scalar score to code.
    In this demo, we want the agent to discover the function: f(x) = x^2 + 2x + 1
    """

    # TODO: make this extensible, instead of being hardcoded
    def __init__(self):
        # Ground truth data (x, y) pairs
        self.test_inputs = [-5, -2, 0, 2, 5, 10]
        self.test_targets = [x**2 + 2 * x + 1 for x in self.test_inputs]

    def evaluate(self, code_str: str) -> float:
        """
        Executes the code securely (mocked here with exec) and calculates error.
        Higher fitness is better (fitness = -error).
        """
        # Define a local scope to run the generated code
        local_scope = {}

        try:
            # TODO: find an alternative to exec, should be fine for
            # offline runs though!
            exec(code_str, {}, local_scope)

            # We expect the LLM to define a function named 'solve'
            if "solve" not in local_scope:
                return -float("inf")

            candidate_func = local_scope["solve"]

            # Calculate Mean Squared Error
            total_error = 0
            for x, target in zip(self.test_inputs, self.test_targets):
                prediction = candidate_func(x)
                if not isinstance(prediction, (int, float)):
                    return -float("inf")
                total_error += (prediction - target) ** 2

            # Return negative error (maximization problem)
            return -total_error

        except Exception:
            # Code that crashes gets the lowest fitness
            return -float("inf")

In [None]:
class AlphaEvolveAgent:
    def __init__(self, config: SearchConfig):
        self.config = config

        print(f"Loading {config.model_id}...")
        # Load Gemma on GPU
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.model_id)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.config.model_id,
            device_map="auto",
            # TODO: make this a param
            torch_dtype=torch.float16,
        )
        self.evaluator = Evaluator()
        self.population: List[Program] = []

    def seed_population(self, initial_code: str):
        """Initialize the database with a user-provided starting point[cite: 60]."""
        fitness = self.evaluator.evaluate(initial_code)
        self.population.append(Program(code=initial_code, fitness=fitness))
        print(f"Seeded with fitness: {fitness}")

    def construct_prompt(self, parent: Program, inspirations: List[Program]) -> str:
        """
        Builds the 'Rich Context' prompt[cite: 65].
        It includes 'Prior programs' (inspirations) and the 'Current program' (parent) to mutate.
        """

        # 1. Context: Show high-performing past solutions
        prompt_content = "You are an intelligent coding assistant. Your goal is to optimize a Python function to match a hidden mathematical pattern.\n\n"

        if inspirations:
            prompt_content += "--- Prior Best Solutions ---\n"
            for p in inspirations:
                prompt_content += f"Score: {p.fitness}\nCode:\n{p.code}\n\n"

        # 2. Task: Present the parent code to modify
        prompt_content += "--- Current Code to Improve ---\n"
        prompt_content += f"{parent.code}\n\n"

        prompt_content += "--- Task ---\n"
        prompt_content += "Rewrite the 'Current Code' to improve its accuracy. "
        prompt_content += "Think about the pattern in the Prior Solutions. "
        prompt_content += "Output ONLY the full Python code for the 'solve' function. No markdown, no explanation."

        # Format for Gemma (Chat Template)
        messages = [{"role": "user", "content": prompt_content}]
        return self.tokenizer.apply_chat_template(
            messages, tokenize=False, add_generation_prompt=True
        )

    def extract_code(self, llm_response: str) -> str:
        """Parses the LLM output to extract executable Python code."""
        # Simple regex to find python code blocks if the model uses markdown
        match = re.search(r"```python\n(.*?)\n```", llm_response, re.DOTALL)
        if match:
            return match.group(1)

        # If no markdown, assume the whole response is code (fallback)
        # Cleaning up common chat artifacts
        clean_code = llm_response.replace("```", "").strip()
        return clean_code

    # TODO: implement some form of early stopping in case fitness doesn't
    # improve after a fixed number of steps
    @torch.no_grad()
    def llm_mutate(self, parent: Program, inspirations: List[Program]) -> str:
        """
        Uses the LLM to propose a 'diff' or rewrite of the parent code.
        """
        prompt = self.construct_prompt(parent, inspirations)
        inputs = self.tokenizer(prompt, return_tensors="pt").to("cuda")

        outputs = self.model.generate(
            **inputs,
            max_new_tokens=256,
            temperature=0.7,  # High temp for diversity/exploration
            do_sample=True,
        )

        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        # Remove the prompt from the output to get just the response
        response_text = generated_text[
            len(self.tokenizer.decode(inputs.input_ids[0], skip_special_tokens=True)) :
        ]

        return self.extract_code(response_text)

    def step(self, generation_idx):
        """Runs one iteration of the evolutionary loop."""
        print(f"\n--- Generation {generation_idx} ---")

        # Sort population by fitness (descending)
        self.population.sort(key=lambda p: p.fitness, reverse=True)

        # Keep top K as "Inspirations" for the prompt (Elitism)
        inspirations = self.population[: self.config.num_parent_context]

        new_programs = []

        # Generate offspring
        # We take the best parent and try to mutate it multiple times
        parent = self.population[0]

        for i in range(self.config.population_size):
            print(f"  > Mutating parent (Fitness: {parent.fitness})...", end="")

            try:
                # 1. LLM Mutation
                mutated_code = self.llm_mutate(parent, inspirations)

                # 2. Evaluation
                fitness = self.evaluator.evaluate(mutated_code)
                print(f" Result Fitness: {fitness}")

                # 3. Add to pool
                new_programs.append(Program(code=mutated_code, fitness=fitness))

            except Exception as e:
                print(f" Failed: {e}")

        # Update Population (Join and Select)
        self.population.extend(new_programs)
        self.population.sort(key=lambda p: p.fitness, reverse=True)
        # Prune to fixed size, keeping only the best ones in terms of fitness
        self.population = self.population[: self.config.population_size]

        print(f"Best in Gen {generation_idx}: {self.population[0].fitness}")

In [None]:
# starting seed program
initial_heuristic = """
def solve(x):
# Initial guess: linear relationship
return x * 2
"""

agent = AlphaEvolveAgent(search_config)
agent.seed_population(initial_heuristic)

for gen in range(1, search_config.num_generations + 1):
    agent.step(gen)

print("\n=== Final Discovered Solution ===")
print(agent.population[0].code)