In [2]:
import subprocess
import time
import random
import pandas as pd

## Evaluating One Candidate

In [118]:
def apply_passes(input, pass_order):
    cmd = ["./compile.sh", input]
    # print(cmd)
    # cmd += input
    cmd += pass_order
    # print(" ".join(cmd))

    result = subprocess.run(cmd, capture_output=True, text=True, check=True, cwd="./beebs")
    return result

def _measure_runtime(binary, count=50, mode="cycles"):
    total_score = 0.0
    base_score = 0.0
    for _ in range(count):
        # start = time.time()
        result = subprocess.run(
            ["perf", "stat", "-e", "cycles", f"./{binary}/{binary}"],
            capture_output=True,
            text=True,
            cwd="./beebs/src"
        )
        
        lines = result.stderr.splitlines()
        cycles_line = next((line for line in lines if "cycles" in line), None)
        time_line = next((line for line in lines if "seconds time elapsed" in line), None)

        result_base = subprocess.run(
            ["perf", "stat", "-e", "cycles", f"./{binary}/{binary}_base"],
            capture_output=True,
            text=True,
            cwd="./beebs/src"
        )
        # Extract cycles and time from stderr
        lines_base = result_base.stderr.splitlines()
        cycles_line_base = next((line for line in lines_base if "cycles" in line), None)
        time_line_base = next((line for line in lines_base if "seconds time elapsed" in line), None)

        if cycles_line and time_line and cycles_line_base and time_line_base:
            cycles = int(cycles_line.split()[0].replace(",", ""))
            cycles_base = int(cycles_line_base.split()[0].replace(",", ""))

            elapsed_time = float(time_line.split()[0])
            elapsed_time_base = float(time_line_base.split()[0])

            if mode == "time":
                # Use elapsed time directly
                total_score += elapsed_time
                base_score += elapsed_time_base
            elif mode == "cycles":
                # Use cycles as the score
                total_score += cycles
                base_score += cycles_base
    return (total_score - base_score) / count

def measure_runtime_alt(binary, count=10, mode="cycles"):
    total_score = 0.0
    for _ in range(count):
        # start = time.time()
        result = subprocess.run(
            ["perf", "stat", "-e", "cycles", f"./{binary}/{binary}"],
            capture_output=True,
            text=True,
            cwd="./beebs/src"
        )
        
        lines = result.stderr.splitlines()
        cycles_line = next((line for line in lines if "cycles" in line), None)
        time_line = next((line for line in lines if "seconds time elapsed" in line), None)

        if cycles_line and time_line:
            cycles = int(cycles_line.split()[0].replace(",", ""))

            elapsed_time = float(time_line.split()[0])

            if mode == "time":
                # Use elapsed time directly
                total_score += elapsed_time
            elif mode == "cycles":
                # Use cycles as the score
                total_score += cycles
    return (total_score) / count

def measure_runtime(binary, count=10, mode="cycles"):
    total_score = 0.0
    total_base = 0.0
    for _ in range(count):
        # start = time.time()
        result = subprocess.run(
            [f"./{binary}/{binary}"],
            capture_output=True,
            text=True,
            cwd="./beebs/src"
        )

        result2 = subprocess.run(
            [f"./{binary}/{binary}_base"],
            capture_output=True,
            text=True,
            cwd="./beebs/src"
        )

        if "Cycles" in result.stdout:
            cycles = int(result.stdout.split("Cycles:")[1].split()[0])
            base = int(result2.stdout.split("Cycles:")[1].split()[0])
            if mode == "cycles":
                total_score += cycles
                total_base += base
    return (total_score - total_base) / count

def measure_size(binary):
    result = subprocess.run(["size", binary], capture_output=True, text=True)
    lines = result.stdout.strip().split("\n")
    if len(lines) < 2:
        return None
    size_fields = lines[1].split()
    total_size = sum(int(x) for x in size_fields[:4])
    return total_size

def evaluate_candidate(pass_order, input, workdir, mode="runtime"):
    candidate = f"{workdir}/candidate_bin"
    try:
        apply_passes(input, pass_order)
        if mode == "runtime":
            fitness = measure_runtime(input)
        elif mode == "size":
            fitness = measure_size(input)
        else:
            raise ValueError("Invalid mode")
        return fitness
    except subprocess.CalledProcessError:
        return float("inf")  # Penalize failure

In [None]:
measure_runtime("crc32", 1)

In [120]:
apply_passes("dijkstra", ["-O2", "-funroll-loops"])



## Genetic Algorithm Operators

In [6]:

def random_candidate(pass_pool, max_len=5):
    # length = random.randint(1, max_len)
    return random.sample(pass_pool, max_len)



def crossover(parent1, parent2, max_len=5):
    # Pick cut points
    p1_cut = random.randint(1, len(parent1))
    p2_cut = random.randint(1, len(parent2))

    # Combine slices
    child = parent1[:p1_cut] + parent2[p2_cut:]

    # Remove duplicates while preserving order
    seen = set()
    deduped_child = []
    for p in child:
        if p not in seen:
            deduped_child.append(p)
            seen.add(p)

    # Clip to max length
    return deduped_child[:max_len]


def mutate(candidate, pass_pool, mutation_rate=0.1, max_len=5):
    # Start with a copy
    mutated = candidate[:]
    
    # Replace one random pass in the candidate
    if mutated and random.random() < mutation_rate:
        replace_idx = random.randint(0, len(mutated) - 1)
        available = list(set(pass_pool) - set(mutated))
        if available:
            mutated[replace_idx] = random.choice(available)

    # Random insertion
    if len(mutated) < max_len and random.random() < mutation_rate:
        available = list(set(pass_pool) - set(mutated))
        if available:
            insert_pos = random.randint(0, len(mutated))
            mutated.insert(insert_pos, random.choice(available))

    # Random deletion
    if len(mutated) > 1 and random.random() < mutation_rate:
        del_pos = random.randint(0, len(mutated) - 1)
        mutated.pop(del_pos)

    return mutated



## Running the GA loop

In [7]:

def run_ga(input_bc, pass_pool, generations=10, pop_size=20, seq_length=5):
    population = [random_candidate(pass_pool, seq_length) for _ in range(pop_size)]
    stats = []

    apply_passes(input_bc, [])
    subprocess.run(["mv", f"{input_bc}/{input_bc}", f"{input_bc}/{input_bc}_base"], cwd= "./beebs/src", capture_output=True, text=True, check=True)

    # Initialize global best candidate
    global_best_candidate = None
    global_best_fitness = float("inf")

    for generation in range(generations):
        print(f"Generation {generation}")
        fitnesses = []
        for candidate in population:
            fitness = evaluate_candidate(candidate, input_bc, "./scratch")
            fitnesses.append((fitness, candidate))
            # print(f"Candidate {candidate} => Fitness {fitness}")

        # Update global best candidate
        for fitness, candidate in fitnesses:
            if fitness < global_best_fitness:
                global_best_fitness = fitness
                global_best_candidate = candidate

        # Selection
        fitnesses.sort(key=lambda x: x[0])
        population = [cand for _, cand in fitnesses[:(pop_size // 2)]]

        # Crossover and mutation
        new_population = []
        while len(new_population) < pop_size:
            parents = random.sample(population, 2)
            c1 = crossover(parents[0], parents[1], seq_length)
            c1 = mutate(c1, pass_pool, seq_length)
            new_population.append(c1)

        population = new_population

        # Collect stats for the generation
        best_fitness, best_candidate = fitnesses[0]
        worst_fitness, worst_candidate = fitnesses[-1]
        avg_fitness = sum(f[0] for f in fitnesses) / len(fitnesses)
        stats.append({
            "Generation": generation,
            "Best Candidate": best_candidate,
            "Global Best Candidate": global_best_candidate,
            "Worst Candidate": worst_candidate,
            "Best Time": best_fitness,
            "Worst Time": worst_fitness,
            "Global Best Time": global_best_fitness,
            "Average Time": avg_fitness
        })
        print(stats[-1])

    # Final best
    print(f"Global Best candidate: {global_best_candidate} with fitness {global_best_fitness}")

    # Create a DataFrame from stats
    stats_df = pd.DataFrame(stats)
    return stats_df


## Example usage

In [8]:
PASS_POOL = [
    "-fgcse-after-reload",
    "-fipa-cp-clone",
    "-floop-interchange",
    "-floop-unroll-and-jam",
    "-fpeel-loops",
    "-fpredictive-commoning",
    "-fsplit-loops",
    "-fsplit-paths",
    "-ftree-loop-distribution",
    "-ftree-partial-pre",
    "-funswitch-loops",
    "-fvect-cost-model=dynamic",
    "-fversion-loops-for-strides",   
]

In [285]:
# sources = ["aha-mont64"]
sources = ["edn"]
stats_df = run_ga(sources[0], PASS_POOL, generations=20, pop_size=30, seq_length=5)

Generation 0
{'Generation': 0, 'Best Candidate': ['-ftree-loop-distribution', '-funswitch-loops', '-fpeel-loops', '-fversion-loops-for-strides', '-floop-unroll-and-jam'], 'Global Best Candidate': ['-ftree-loop-distribution', '-funswitch-loops', '-fpeel-loops', '-fversion-loops-for-strides', '-floop-unroll-and-jam'], 'Worst Candidate': ['-ftree-loop-distribution', '-floop-unroll-and-jam', '-fversion-loops-for-strides', '-floop-interchange', '-funswitch-loops'], 'Best Time': -1882485.6, 'Worst Time': 15175023.4, 'Global Best Time': -1882485.6, 'Average Time': 717588.7366666666}
Generation 1
{'Generation': 1, 'Best Candidate': ['-fversion-loops-for-strides', '-funswitch-loops', '-floop-interchange', '-fpeel-loops'], 'Global Best Candidate': ['-fversion-loops-for-strides', '-funswitch-loops', '-floop-interchange', '-fpeel-loops'], 'Worst Candidate': ['-floop-interchange', '-fversion-loops-for-strides', '-floop-unroll-and-jam', '-ftree-loop-distribution'], 'Best Time': -3039199.8, 'Worst Ti

In [286]:
stats_df

Unnamed: 0,Generation,Best Candidate,Global Best Candidate,Worst Candidate,Best Time,Worst Time,Global Best Time,Average Time
0,0,"[-ftree-loop-distribution, -funswitch-loops, -...","[-ftree-loop-distribution, -funswitch-loops, -...","[-ftree-loop-distribution, -floop-unroll-and-j...",-1882485.6,15175023.4,-1882485.6,717588.7
1,1,"[-fversion-loops-for-strides, -funswitch-loops...","[-fversion-loops-for-strides, -funswitch-loops...","[-floop-interchange, -fversion-loops-for-strid...",-3039199.8,1236887.3,-3039199.8,-344298.6
2,2,"[-floop-interchange, -funswitch-loops, -floop-...","[-fversion-loops-for-strides, -funswitch-loops...","[-floop-unroll-and-jam, -ftree-loop-distribution]",-1467113.1,2706162.0,-3039199.8,98014.59
3,3,"[-fversion-loops-for-strides, -floop-interchan...","[-fversion-loops-for-strides, -funswitch-loops...","[-funswitch-loops, -ftree-loop-distribution, -...",-2635471.2,935294.3,-3039199.8,-379311.5
4,4,"[-fversion-loops-for-strides, -fpeel-loops, -f...","[-fversion-loops-for-strides, -funswitch-loops...","[-funswitch-loops, -fpeel-loops]",-1782123.9,815284.9,-3039199.8,-420584.3
5,5,"[-fpeel-loops, -floop-interchange, -floop-unro...","[-fversion-loops-for-strides, -funswitch-loops...","[-ftree-loop-distribution, -fversion-loops-for...",-1835009.3,1233413.8,-3039199.8,-285245.1
6,6,"[-floop-interchange, -fversion-loops-for-strides]","[-fversion-loops-for-strides, -funswitch-loops...","[-floop-unroll-and-jam, -ftree-loop-distributi...",-1348860.9,1996624.1,-3039199.8,-17657.96
7,7,"[-funswitch-loops, -floop-unroll-and-jam]","[-funswitch-loops, -floop-unroll-and-jam]","[-ftree-loop-distribution, -floop-interchange]",-5348313.0,9952575.6,-5348313.0,133985.6
8,8,[-funswitch-loops],"[-funswitch-loops, -floop-unroll-and-jam]","[-fversion-loops-for-strides, -funswitch-loops...",-1625584.0,3157168.3,-5348313.0,-207076.6
9,9,[-fpeel-loops],[-fpeel-loops],[-ftree-loop-distribution],-14895863.0,16920765.5,-14895863.0,-609773.7


In [149]:
# apply_passes("edn", [])
input_bc = "dijkstra"
apply_passes(input_bc, [])

# subprocess.run(["mv", f"{input_bc}/{input_bc}", f"{input_bc}/{input_bc}_base"], cwd= "./beebs/src", capture_output=True, text=True, check=True)
apply_passes(input_bc, ["-funroll-all-loops", "-finline-functions", "-fipa-cp-clone", "-fpredictive-commoning", "-ftree-loop-distribution", "-ftree-partial-pre", "-funswitch-loops", "-fvect-cost-model=dynamic", "-fsplit-loops", "-fsplit-paths", "-fversion-loops-for-strides"])
# result = apply_passes(input_bc, ["-O2", "-funroll-loops"])
# print(result)
opt_time = measure_runtime(input_bc, 50)
# -75552263.64
# -73332604.12
# -77537008.64
# -75777408.08
# -74279599.84
# -75036094.64

# dump_cmd = ["riscv64-unknown-elf-objdump", "-D", f"beebs/src/{input_bc}/{input_bc}", ">", f"{input_bc}.dump", "&&", "riscv64-unknown-elf-objdump","-D", f"beebs/src/{input_bc}/{input_bc}_base", ">", f"{input_bc}_base.dump"]
# subprocess.run(" ".join(dump_cmd), shell=True, capture_output=True, text=True, check=True)

opt_time

-76004866.36

In [None]:
stats_df.to_csv("results/fibcall_stats.csv", index=False)

In [256]:
stats_df.iloc[19]["Global Best Candidate"]

['-fvect-cost-model=dynamic']

In [249]:
stats_df.iloc[19]["Global Best Candidate"]

['-fpredictive-commoning', '-finline-functions']

In [251]:
stats_df.iloc[19]["Global Best Candidate"]

['-fpredictive-commoning']

In [192]:
stats_df

Unnamed: 0,Generation,Best Candidate,Worst Candidate,Best Time,Worst Time,Average Time,Global Best Candidate,Global Best Time
0,0,"[-fvect-cost-model=dynamic, -fpredictive-commo...","[-fpeel-loops, -fsplit-loops, -fgcse-after-rel...",15203030.0,17142500.0,15890550.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
1,1,"[-fversion-loops-for-strides, -floop-unroll-an...","[-fsplit-paths, -fpeel-loops, -fsplit-loops]",15284050.0,16340000.0,15868760.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
2,2,"[-ftree-loop-distribution, -finline-functions,...","[-fsplit-paths, -funswitch-loops, -fgcse-after...",15455910.0,16150280.0,15904820.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
3,3,"[-ftree-partial-pre, -fsplit-paths]","[-ftree-loop-distribution, -finline-functions,...",15637200.0,16319690.0,15994750.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
4,4,"[-ftree-partial-pre, -floop-unroll-and-jam]",[-ftree-partial-pre],15374160.0,17210090.0,16298960.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
5,5,"[-fpredictive-commoning, -fpeel-loops]","[-ftree-partial-pre, -floop-unroll-and-jam]",15516680.0,16966860.0,15955010.0,"[-fvect-cost-model=dynamic, -fpredictive-commo...",15203030.0
6,6,[-fversion-loops-for-strides],[-ftree-partial-pre],14632240.0,37846310.0,18214180.0,[-fversion-loops-for-strides],14632240.0
7,7,[-ftree-partial-pre],[-ftree-partial-pre],13866640.0,43943740.0,17713520.0,[-ftree-partial-pre],13866640.0
8,8,"[-ftree-partial-pre, -floop-unroll-and-jam]",[-fversion-loops-for-strides],15330580.0,17220070.0,16112870.0,[-ftree-partial-pre],13866640.0
9,9,"[-fipa-cp-clone, -ftree-partial-pre]",[-fversion-loops-for-strides],15150870.0,17079360.0,16071220.0,[-ftree-partial-pre],13866640.0


In [None]:
stats_df.iloc[2]

Generation                                                         2
Best Candidate     [-fversion-loops-for-strides, -ftree-loop-dist...
Worst Candidate                                     [-fipa-cp-clone]
Best Time                                                   0.004494
Worst Time                                                  0.005263
Average Time                                                0.004587
Name: 2, dtype: object

In [None]:
sources = ["sglib-listsort", # 1 
           "dtoa", # 2
           "fac", # 3

           "trio-snprintf", # 1 
           "fibcall", # 2
           "crc32", # 3

           "ctl-vector", # 1
           "nettle-arcfour", # 2
           "template" # 3
           ] 

for source in sources:
    print(f"Running GA for {source}")
    stats_df = run_ga(sources[0], PASS_POOL, generations=30, pop_size=40, seq_length=5)
    stats_df.to_csv(f"results/{source}_stats.csv", index=False)

Running GA for sglib-listsort
Generation 0
{'Generation': 0, 'Best Candidate': ['-ftree-partial-pre', '-floop-interchange', '-fversion-loops-for-strides', '-ftree-loop-distribution', '-fgcse-after-reload'], 'Global Best Candidate': ['-ftree-partial-pre', '-floop-interchange', '-fversion-loops-for-strides', '-ftree-loop-distribution', '-fgcse-after-reload'], 'Worst Candidate': ['-fpredictive-commoning', '-fvect-cost-model=dynamic', '-fsplit-paths', '-fversion-loops-for-strides', '-fipa-cp-clone'], 'Best Time': -7258341.7, 'Worst Time': 8824510.3, 'Global Best Time': -7258341.7, 'Average Time': -501196.6980000005}
Generation 1
{'Generation': 1, 'Best Candidate': ['-fsplit-paths', '-fvect-cost-model=dynamic', '-fgcse-after-reload', '-floop-interchange'], 'Global Best Candidate': ['-fsplit-paths', '-fvect-cost-model=dynamic', '-fgcse-after-reload', '-floop-interchange'], 'Worst Candidate': ['-ftree-partial-pre', '-floop-interchange', '-ftree-loop-distribution', '-fversion-loops-for-strides

KeyboardInterrupt: 