In [2]:
import subprocess
import time
import random

## Evaluating One Candidate

In [1]:
def apply_passes(input_bc, output_bc, pass_order):
    passes_str = ",".join(pass_order)
    cmd = ["opt", f"-passes={passes_str}", input_bc, "-o", output_bc]
    subprocess.run(cmd, check=True)

def compile_to_binary(output_bc, output_bin):
    cmd = ["clang", output_bc, "-o", output_bin]
    subprocess.run(cmd, check=True)

def measure_runtime(binary):
    start = time.time()
    subprocess.run([f"./{binary}"], check=True)
    end = time.time()
    return end - start

def measure_size(binary):
    result = subprocess.run(["size", binary], capture_output=True, text=True)
    lines = result.stdout.strip().split("\n")
    if len(lines) < 2:
        return None
    size_fields = lines[1].split()
    total_size = sum(int(x) for x in size_fields[:4])
    return total_size

def evaluate_candidate(pass_order, input_bc, workdir, mode="runtime"):
    candidate_bc = f"{workdir}/candidate.bc"
    candidate_bin = f"{workdir}/candidate_bin"
    try:
        apply_passes(input_bc, candidate_bc, pass_order)
        compile_to_binary(candidate_bc, candidate_bin)
        if mode == "runtime":
            fitness = measure_runtime(candidate_bin)
        elif mode == "size":
            fitness = measure_size(candidate_bin)
        else:
            raise ValueError("Invalid mode")
        return fitness
    except subprocess.CalledProcessError:
        return float("inf")  # Penalize failure

## Genetic Algorithm Operators

In [None]:

def random_candidate(pass_pool, max_len=5):
    length = random.randint(1, max_len)
    return [random.choice(pass_pool) for _ in range(length)]


def crossover(parent1, parent2, max_len=5):
    # Pick cut points
    p1_cut = random.randint(1, len(parent1))
    p2_cut = random.randint(1, len(parent2))
    
    # Combine slices
    child = parent1[:p1_cut] + parent2[p2_cut:]
    
    # Clip if too long
    if len(child) > max_len:
        child = child[:max_len]
    
    return child


def mutate(candidate, pass_pool, mutation_rate=0.1, max_len=5):
    mutated = []
    for gene in candidate:
        if random.random() < mutation_rate:
            # Replace this pass
            mutated.append(random.choice(pass_pool))
        else:
            mutated.append(gene)
    
    # Random insertion
    if len(mutated) < max_len and random.random() < mutation_rate:
        insert_pos = random.randint(0, len(mutated))
        mutated.insert(insert_pos, random.choice(pass_pool))
    
    # Random deletion
    if len(mutated) > 1 and random.random() < mutation_rate:
        del_pos = random.randint(0, len(mutated) - 1)
        mutated.pop(del_pos)
    
    return mutated


## Running the GA loop

In [4]:
def run_ga(input_bc, pass_pool, generations=10, pop_size=20, seq_length=5):
    population = [random_candidate(pass_pool, seq_length) for _ in range(pop_size)]

    for generation in range(generations):
        print(f"Generation {generation}")
        fitnesses = []
        for candidate in population:
            fitness = evaluate_candidate(candidate, input_bc, "/tmp/workdir")
            fitnesses.append((fitness, candidate))
            print(f"Candidate {candidate} => Fitness {fitness}")

        # Selection
        fitnesses.sort(key=lambda x: x[0])
        population = [cand for _, cand in fitnesses[:pop_size // 2]]

        # Crossover and mutation
        new_population = []
        while len(new_population) < pop_size:
            parents = random.sample(population, 2)
            c1, c2 = crossover(parents[0], parents[1])
            c1 = mutate(c1, pass_pool)
            c2 = mutate(c2, pass_pool)
            new_population.extend([c1, c2])

        population = new_population

    # Final best
    best = min(fitnesses, key=lambda x: x[0])
    print(f"Best candidate: {best[1]} with fitness {best[0]}")


## Example usage

In [None]:
PASS_POOL = [
    "inline",
    "loop-unroll",
    "gvn",
    "adce",
    "instcombine",
    "simplifycfg",
    "reassociate",
    "licm",
]

run_ga("foo.bc", PASS_POOL)