In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

## Basic Block and CFG Classes

In [2]:
class BasicBlock:
    def __init__(self, name):
        self.name = name                  # Block label or address
        self.instructions = []           # List of instructions
        self.successors = set()          # Edges to next blocks
        self.predecessors = set()        # Incoming edges
        self.execution_count = 0          # Number of times this block was executed
        self.execution_time = 0          # Time taken to execute this block

class ControlFlowGraph:
    def __init__(self):
        self.blocks = {}                 # name -> BasicBlock

    def add_block(self, block):
        self.blocks[block.name] = block

    def add_edge(self, from_block, to_block):
        self.blocks[from_block].successors.add(to_block)
        self.blocks[to_block].predecessors.add(from_block)


## Generating CFG Data

In [3]:
# List all binaries in the profiles/embench directory
binaries_dir = "profiles/embench"
binaries = [f for f in os.listdir(binaries_dir) if os.path.isfile(os.path.join(binaries_dir, f))]

# Run the command for each binary
for binary in binaries:
    binary_path = os.path.join(binaries_dir, binary)
    command = f"./profiles/cfg_gen {binary_path}"
    os.system(command)

Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.


## Extracting CFG Data from file into CFG class 

In [28]:
def extract_cfg(benchmark):
    lines = []

    with open(f"profiles/cfgs/{benchmark}.dot", "r") as f:
        lines = f.readlines()
    
    if len(lines) == 0:
        print(f"Error: No lines found in {benchmark}.dot")
        return None

    df = pd.read_csv(f"profiles/dataset_full/{benchmark}_dataset.csv")
    if df.empty:
        print(f"Error: No data found in {benchmark}_dataset.csv")
        return None

    # Parse the .dot file to extract basic blocks and edges
    cfg = ControlFlowGraph()
    for line in lines:
        if "->" in line:
            parts = line.strip().split(" -> ")
            from_block = parts[0].strip().replace("\"","")
            # Remove the label from the block name
            to_block = parts[1].strip().split(";")[0].strip().replace("\"","").split("[")[0].strip()

            # Create blocks if they don't exist
            if from_block not in cfg.blocks:
                cfg.add_block(BasicBlock(from_block))
                
                row = df[df["basic_block_address"] == from_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[from_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]

            if to_block not in cfg.blocks:
                cfg.add_block(BasicBlock(to_block))
                # Add edge between blocks   
                row = df[df["basic_block_address"] == to_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[to_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]
            
            cfg.add_edge(from_block, to_block)
            # print row from df where "block" column is from_block
    return cfg

In [29]:
cfg = extract_cfg("aha-mont64")  # Replace with the actual benchmark name
# print blocks in CFG
for block_name, block in cfg.blocks.items():
    print(f"Block: {block_name}, Execution Count: {block.execution_count}, Execution Time: {block.execution_time}")
    print(f"Successors: {block.successors}")
    print(f"Predecessors: {block.predecessors}")
    print()


Block: 10c38, Execution Count: 1, Execution Time: 140.0
Successors: {'15cf8'}
Predecessors: {'1392c'}

Block: 15cf8, Execution Count: 0, Execution Time: 0
Successors: {'END'}
Predecessors: {'10c38'}

Block: 13914, Execution Count: 3, Execution Time: 47.333333333333336
Successors: {'138f8', '13924'}
Predecessors: {'158b8'}

Block: 13924, Execution Count: 1, Execution Time: 140.0
Successors: {'1392c'}
Predecessors: {'13914'}

Block: 15900, Execution Count: 1, Execution Time: 139.0
Successors: {'1587c'}
Predecessors: {'14790'}

Block: 1587c, Execution Count: 3, Execution Time: 74.66666666666667
Successors: {'15898'}
Predecessors: {'15870', '15900'}

Block: 13220, Execution Count: 1, Execution Time: 130.0
Successors: {'130d8'}
Predecessors: {'131f8'}

Block: 130d8, Execution Count: 1, Execution Time: 137.0
Successors: {'14790'}
Predecessors: {'13220'}

Block: 1478c, Execution Count: 2, Execution Time: 199.0
Successors: {'13014', '13cc0'}
Predecessors: {'13cbc', '12ff8'}

Block: 13014, Exec

In [34]:
def extract_all_single_entry_exit_sequences(cfg):
    sese_regions = []

    for block_name in cfg.blocks:
        start = cfg.blocks[block_name]
        region = set()
        stack = [start.name]
        region_entry = {start.name}

        while stack:
            current = stack.pop()
            if current in region:
                continue
            region.add(current)

            block = cfg.blocks[current]

            for succ in block.successors:
                succ_block = cfg.blocks[succ]

                # Stop if successor has a predecessor outside the region
                external_preds = [
                    pred for pred in succ_block.predecessors if pred not in region
                ]
                if len(external_preds) > 0 and succ not in region:
                    continue  # not a clean single-entry region

                stack.append(succ)

        # Identify entry and exit blocks of the region
        entry_candidates = [
            b for b in region if any(p not in region for p in cfg.blocks[b].predecessors)
        ]
        exit_candidates = [
            b for b in region if any(s not in region for s in cfg.blocks[b].successors)
        ]

        if len(entry_candidates) == 1 and len(exit_candidates) == 1:
            sese_regions.append(region)

    return sese_regions


In [35]:
sese_regions = extract_all_single_entry_exit_sequences(cfg)
# Print the single-entry single-exit regions
for i, region in enumerate(sese_regions):
    print(f"Region {i + 1}:")
    for block_name in region:
        block = cfg.blocks[block_name]
        print(f"  Block: {block_name}, Execution Count: {block.execution_count}, Execution Time: {block.execution_time}")
    print()

Region 1:
  Block: 138f8, Execution Count: 2, Execution Time: 89.5
  Block: 1392c, Execution Count: 1, Execution Time: 140.0
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: 13924, Execution Count: 1, Execution Time: 140.0
  Block: 13914, Execution Count: 3, Execution Time: 47.333333333333336
  Block: 10c38, Execution Count: 1, Execution Time: 140.0
  Block: END, Execution Count: 0, Execution Time: 0

Region 2:
  Block: 15900, Execution Count: 1, Execution Time: 139.0

Region 3:
  Block: 138f8, Execution Count: 2, Execution Time: 89.5
  Block: 158ac, Execution Count: 3, Execution Time: 59.333333333333336
  Block: 158b0, Execution Count: 3, Execution Time: 56.333333333333336
  Block: 1392c, Execution Count: 1, Execution Time: 140.0
  Block: 1587c, Execution Count: 3, Execution Time: 74.66666666666667
  Block: 13924, Execution Count: 1, Execution Time: 140.0
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: 158b8, Execution Count: 3, Execution Time: 47.33333333