## Algorithm 1: Naive Paired Cost Analysis

In [37]:
import os
import re
import json
import argparse
from collections import defaultdict

def parse_files(directory):
    """
    Parses all 'ipra_analysis_*.txt' files in a directory. This is done by
    reading all files to build a complete model of the program's call graph
    and register usage.
    """
    callee_save_costs = {}
    callee_call_sites = defaultdict(list)
    function_hotness = {}

    func_pattern = re.compile(r"IPRA: Function: (.*?)\[")
    usage_pattern = re.compile(r"CSRegUsage: (.*?) IsFunctionEntryHot: (\d+)")
    call_pattern = re.compile(r"Calls: (.*?)\[.*\] IsTailCall: (\d+).*? LivingCSRegs: (.*)")
    mbb_pattern = re.compile(r"MBB: \d+.*?MBBCount: (\d+)")

    files_to_process = [os.path.join(directory, f) for f in os.listdir(directory) if f.startswith('ipra_analysis_') and f.endswith('.txt')]
    print(f"Found {len(files_to_process)} profile files to process.")

    for filepath in files_to_process:
        with open(filepath, 'r', errors='ignore') as f:
            current_function = None
            current_mbb_count = 0
            for line in f:
                func_match = func_pattern.search(line)
                if func_match:
                    current_function = func_match.group(1).strip()
                    current_mbb_count = 0

                usage_match = usage_pattern.search(line)
                if usage_match and current_function:
                    regs_str = usage_match.group(1).strip()
                    num_regs = len(regs_str.split()) if regs_str else 0
                    callee_save_costs[current_function] = num_regs
                    is_hot_str = usage_match.group(2).strip()
                    function_hotness[current_function] = (int(is_hot_str) == 1)
                
                mbb_match = mbb_pattern.search(line)
                if mbb_match:
                    current_mbb_count = int(mbb_match.group(1))

                call_match = call_pattern.search(line)
                if call_match and current_function:
                    callee_name = call_match.group(1).strip()
                    live_regs_str = call_match.group(3).strip()
                    num_live_regs = len(live_regs_str.split()) if live_regs_str else 0
                    
                    callee_call_sites[callee_name].append({
                        "caller": current_function,
                        "live_csrs": num_live_regs,
                        "count": current_mbb_count
                    })

    print(f"Found callee-save costs for {len(callee_save_costs)} unique functions.")
    print(f"Found call sites for {len(callee_call_sites)} unique callees.")
    return callee_save_costs, callee_call_sites, function_hotness

In [38]:
def calculate_benefits(callee_save_costs, callee_call_sites, function_hotness):
    """
    Calculates the total adjusted benefit score for each function.
    """
    benefit_scores = defaultdict(int)
    print(f"Calculating benefit scores...")

    for callee, sites in callee_call_sites.items():
        if not function_hotness.get(callee, False):
            continue
        callee_cost = callee_save_costs.get(callee, 0)
        
        total_dynamic_benefit = 0
        sum_of_caller_costs = 0
        
        for site in sites:
            caller_cost = site["live_csrs"]
            exec_count = site["count"]
            total_dynamic_benefit += (callee_cost - caller_cost) * exec_count
            sum_of_caller_costs += caller_cost

        benefit_scores[callee] = total_dynamic_benefit
        
    return benefit_scores

In [39]:
LIVENESS_DATA_DIR = './fdo_liveness_output'
OUTPUT_FILE = './fdo_liveness_output/algo1_liveness_profdata.json'
PRESERVE_NONE_THRESHOLD = 0

costs, sites, function_hotness = parse_files(LIVENESS_DATA_DIR)
scores = calculate_benefits(costs, sites, function_hotness)

# Filter for only positive scores, as negative scores are never beneficial
positive_scores = {func: score for func, score in scores.items()}

# Structure the final JSON and sort by score for easy inspection
output_data = {
    "functions": dict(sorted(positive_scores.items(), key=lambda item: item[1], reverse=True))
}

with open(OUTPUT_FILE, 'w') as f:
    json.dump(output_data, f, indent=2)

print(f"\n✅ Successfully merged profile data into '{OUTPUT_FILE}'")
print(f"Found {len(positive_scores)} functions with a positive benefit score.")

Found 1833 profile files to process.
Found callee-save costs for 93292 unique functions.
Found call sites for 59489 unique callees.
Calculating benefit scores...

✅ Successfully merged profile data into './fdo_liveness_output/algo1_liveness_profdata.json'
Found 3253 functions with a positive benefit score.


## Algorithm 2: Propagating Costs via Bottom-Up Call Graph

In [10]:
import os
import re
import json
import argparse
from collections import defaultdict

def parse_files(directory):
    """
    Parses all 'ipra_analysis_*.txt' files in a directory to build a model
    of the program's call graph and register usage.
    """
    callee_save_costs = {}
    function_hotness = {} # Store hotness for each function
    callee_call_sites = defaultdict(list)
    # The call graph is represented as Caller -> set(Callees)
    successors = defaultdict(set)
    predecessors = defaultdict(set)

    func_pattern = re.compile(r"IPRA: Function: (.*?)\[")
    usage_pattern = re.compile(r"CSRegUsage: (.*?) IsFunctionEntryHot: (\d+)")
    call_pattern = re.compile(r"Calls: (.*?)\[.*\] IsTailCall: (\d+).*? LivingCSRegs: (.*)")
    mbb_pattern = re.compile(r"MBB: \d+.*?MBBCount: (\d+)")

    files_to_process = [os.path.join(directory, f) for f in os.listdir(directory) if f.startswith('ipra_analysis_') and f.endswith('.txt')]
    print(f"Found {len(files_to_process)} profile files to process.")

    all_functions = set()

    for filepath in files_to_process:
        with open(filepath, 'r', errors='ignore') as f:
            current_function = None
            current_mbb_count = 0
            for line in f:
                func_match = func_pattern.search(line)
                if func_match:
                    current_function = func_match.group(1).strip()
                    all_functions.add(current_function)
                    current_mbb_count = 0

                usage_match = usage_pattern.search(line)
                if usage_match and current_function:
                    regs_str = usage_match.group(1).strip()
                    num_regs = len(regs_str.split()) if regs_str else 0
                    callee_save_costs[current_function] = num_regs
                    is_hot_str = usage_match.group(2).strip()
                    function_hotness[current_function] = (int(is_hot_str) == 1)
                
                mbb_match = mbb_pattern.search(line)
                if mbb_match:
                    current_mbb_count = int(mbb_match.group(1))

                call_match = call_pattern.search(line)
                if call_match and current_function:
                    callee_name = call_match.group(1).strip()
                    is_tail_call_str = call_match.group(2).strip()
                    
                    all_functions.add(callee_name)
                    live_regs_str = call_match.group(2).strip()
                    num_live_regs = len(live_regs_str.split()) if live_regs_str else 0
                    
                    callee_call_sites[callee_name].append({
                        "caller": current_function,
                        "live_csrs": num_live_regs,
                        "count": current_mbb_count,
                        "is_tail_call": (int(is_tail_call_str) == 1) # Store tail call info
                    })
                    successors[current_function].add(callee_name)
                    predecessors[callee_name].add(current_function)

    print(f"Found {len(all_functions)} unique functions in the call graph.")
    return callee_save_costs, callee_call_sites, successors, predecessors, all_functions, function_hotness

In [11]:
def topological_sort(nodes, successors, predecessors):
    """
    Performs a topological sort (Kahn's algorithm) to get a bottom-up
    processing order.
    """
    in_degree = {node: len(predecessors[node]) for node in nodes}
    queue = [node for node in nodes if in_degree[node] == 0]
    sorted_nodes = []

    while queue:
        node = queue.pop(0)
        sorted_nodes.append(node)
        for successor in sorted(list(successors[node])): # sort for determinism
            in_degree[successor] -= 1
            if in_degree[successor] == 0:
                queue.append(successor)
    
    # If there's a cycle, not all nodes will be in the sorted list.
    if len(sorted_nodes) != len(nodes):
        print("Warning: Cycle detected in the call graph. Some functions may not be processed.")
        # Add remaining nodes to the end to ensure they are processed.
        remaining_nodes = [n for n in nodes if n not in sorted_nodes]
        sorted_nodes.extend(remaining_nodes)

    return sorted_nodes

In [12]:
def calculate_benefits_bottom_up(callee_save_costs, callee_call_sites, successors, predecessors, all_functions, function_hotness, size_penalty, threshold):
    """
    Calculates benefit scores using a bottom-up traversal of the call graph
    to model the cascading effects of the preserve_none optimization.
    """
    sorted_nodes = topological_sort(all_functions, successors, predecessors)
    print(f"Topologically sorted {len(sorted_nodes)} functions for bottom-up processing.")

    final_candidates = set()
    # This dictionary simulates how a caller's own save cost might increase
    # as its callees become preserve_none.
    effective_cs_usage = defaultdict(int, callee_save_costs)
    final_scores = {}

    for callee in sorted_nodes:
        # Skip any non-hot function:
        if not function_hotness.get(callee, False):
            final_scores[callee] = float('-inf')
            continue

        # 1. Calculate the benefit for the current function using the most up-to-date
        #    cost information for itself and its callees.
        callee_cost = effective_cs_usage[callee]
        total_dynamic_benefit = 0
        sum_of_caller_costs = 0
        
        call_sites = callee_call_sites.get(callee, [])
        for site in call_sites:
            if site.get("is_tail_call", False):
                continue
            caller_cost = site["live_csrs"]
            exec_count = site["count"]
            total_dynamic_benefit += (callee_cost - caller_cost) * exec_count
            sum_of_caller_costs += caller_cost

        total_static_cost = (2 * sum_of_caller_costs) - (2 * callee_cost)
        adjusted_score = total_dynamic_benefit - (size_penalty * total_static_cost)
        final_scores[callee] = adjusted_score

        # 2. Make a decision for the current function.
        if adjusted_score > 0:
            final_candidates.add(callee)
            
            # 3. Propagate the cost of this decision upwards to its callers.
            #    We assume the cost pushed up is the original, static number of
            #    registers the callee was responsible for.
            original_callee_cost = callee_save_costs.get(callee, 0)
            for caller in predecessors[callee]:
                # This simulates the increased register pressure on the caller.
                effective_cs_usage[caller] += original_callee_cost

    return {func: score for func, score in final_scores.items() if func in final_candidates and score > threshold}


In [13]:
LIVENESS_DATA_DIR = './thinly_linked_fdo_liveness_output'
SIZE_PENALTY = 0.1
PRESERVE_NONE_THRESHOLD = 0

costs, sites, successors, predecessors, all_nodes, function_hotness = parse_files(LIVENESS_DATA_DIR)
candidate_scores = calculate_benefits_bottom_up(costs, sites, successors, predecessors, all_nodes, function_hotness, SIZE_PENALTY, PRESERVE_NONE_THRESHOLD)

output_data = dict(sorted(candidate_scores.items(), key=lambda item: item[1], reverse=True))

print(f"Found {len(candidate_scores)} candidate functions meeting the threshold.")

Found 1687 profile files to process.
Found 82906 unique functions in the call graph.
Topologically sorted 82906 functions for bottom-up processing.
Found 1241 candidate functions meeting the threshold.


## Additional Filterting

In [43]:
import os
import re
import json
from collections import defaultdict

LIVENESS_PRERA_DATA_DIR = './fdo_liveness_output'
def filter_dangerous_functions(directory):
    files_to_process = [os.path.join(directory, f) for f in os.listdir(directory) if f.startswith('ipra_prera_analysis_') and f.endswith('.txt')]
    # Regex to capture the main components of a line
    main_pattern = re.compile(r"^IPRA: Function: (.+?)\[(.*?)\]\s*(.*)$")
    # Regex to find all flag names within the flags part of the line
    flag_pattern = re.compile(r"(\w+): \d+")
    parsed_functions = {}
    for filepath in files_to_process:
        with open(filepath, 'r', errors='ignore') as f:
            for line in f:
                main_match = main_pattern.match(line.strip())
                
                if main_match:
                    func_name = main_match.group(1)
                    cu_name = main_match.group(2)
                    flags_string = main_match.group(3)
                    present_flags = flag_pattern.findall(flags_string)
                    
                    parsed_functions[func_name] = present_flags
    return parsed_functions


parsed_functions = filter_dangerous_functions(LIVENESS_PRERA_DATA_DIR)
discarded_functions = set()
discarded_counts = defaultdict(int)
discarded_flags = {'HasAddressTaken', 'MustTailCall', 'IsInterposable', 'UsesAreIndirectCall', 'AllUsesAreNotCall'}
for func in output_data:
    if func in discarded_functions or func not in parsed_functions:
        continue
    flags = set(parsed_functions[func])
    intersection = flags.intersection(discarded_flags)
    if not intersection:
        continue
    for flag in intersection:
        discarded_counts[flag] += 1
    discarded_functions.add(func)
print(f"found {len(discarded_functions)} to discard")
print(discarded_counts)

_ZN4llvm18format_object_base4homeEv
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZNK4llvm13format_objectIJmmmEE7snprintEPcj
AllUsesAreNotCall: 1 HasAddressTaken: 1
__cxa_pure_virtual
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZNK4llvm13format_objectIJmEE7snprintEPcj
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZN4llvm2cl3optIbLb0ENS0_6parserIbEEED2Ev
HasAddressTaken: 1
_ZNK4llvm2cl15OptionValueCopyIbE7compareERKNS0_18GenericOptionValueE
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZN4llvm2cl18GenericOptionValue6anchorEv
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZNSt17_Function_handlerIFvRKbEN4llvm2cl3optIbLb0ENS4_6parserIbEEEUlS1_E_EE9_M_invokeERKSt9_Any_dataS1_
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZNSt17_Function_handlerIFvRKbEN4llvm2cl3optIbLb0ENS4_6parserIbEEEUlS1_E_EE10_M_managerERSt9_Any_dataRKSB_St18_Manager_operation
AllUsesAreNotCall: 1 HasAddressTaken: 1
_GLOBAL__sub_I_ADCE.cpp
AllUsesAreNotCall: 1 HasAddressTaken: 1
_ZN4llvm2cl26TokenizeWindowsCommandLineENS_9StringRefERNS_11StringSaver

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [26]:
before_len = len(output_data)
for func in discarded_functions:
    output_data.pop(func)
after_len = len(output_data)
print(f"Filtered out {before_len - after_len} functions. Now total {len(output_data)} functions left.")

Filtered out 265 functions. Now total 976 functions left.


## Save output

In [27]:
OUTPUT_FILE = f'{LIVENESS_DATA_DIR}/liveness_profdata.json'
output_dict = {"functions": output_data}
with open(OUTPUT_FILE, 'w') as f:
    json.dump(output_dict, f, indent=2)

print(f"\n✅ Successfully merged profile data into '{OUTPUT_FILE}'")


✅ Successfully merged profile data into './thinly_linked_fdo_liveness_output/liveness_profdata.json'
