In [157]:
!pip install graphviz

Defaulting to user installation because normal site-packages is not writeable
Collecting graphviz
  Downloading graphviz-0.20.3-py3-none-any.whl (47 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 KB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: graphviz
Successfully installed graphviz-0.20.3


In [158]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from graphviz import Digraph

## Basic Block and CFG Classes

In [159]:
class BasicBlock:
    def __init__(self, name):
        self.name = name                  # Block label or address
        self.instructions = []           # List of instructions
        self.successors = set()          # Edges to next blocks
        self.predecessors = set()        # Incoming edges
        self.execution_count = 0          # Number of times this block was executed
        self.execution_time = 0          # Time taken to execute this block

class ControlFlowGraph:
    def __init__(self):
        self.blocks = {}                 # name -> BasicBlock

    def add_block(self, block):
        self.blocks[block.name] = block

    def add_edge(self, from_block, to_block):
        self.blocks[from_block].successors.add(to_block)
        self.blocks[to_block].predecessors.add(from_block)


## Generating CFG Data

In [3]:
# List all binaries in the profiles/embench directory
binaries_dir = "profiles/embench"
binaries = [f for f in os.listdir(binaries_dir) if os.path.isfile(os.path.join(binaries_dir, f))]

# Run the command for each binary
for binary in binaries:
    binary_path = os.path.join(binaries_dir, binary)
    command = f"./profiles/cfg_gen {binary_path}"
    os.system(command)

Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.
Benchmark completed successfully with no errors.


## Extracting CFG Data from file into CFG class 

In [160]:
def extract_cfg(benchmark):
    lines = []

    with open(f"profiles/cfgs/{benchmark}.dot", "r") as f:
        lines = f.readlines()
    
    if len(lines) == 0:
        print(f"Error: No lines found in {benchmark}.dot")
        return None

    df = pd.read_csv(f"profiles/dataset_full/{benchmark}_dataset.csv")
    if df.empty:
        print(f"Error: No data found in {benchmark}_dataset.csv")
        return None

    # Parse the .dot file to extract basic blocks and edges
    cfg = ControlFlowGraph()
    for line in lines:
        if "->" in line:
            parts = line.strip().split(" -> ")
            from_block = parts[0].strip().replace("\"","")
            # Remove the label from the block name
            to_block = parts[1].strip().split(";")[0].strip().replace("\"","").split("[")[0].strip()

            # Create blocks if they don't exist
            if from_block not in cfg.blocks:
                cfg.add_block(BasicBlock(from_block))
                
                row = df[df["basic_block_address"] == from_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[from_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]

            if to_block not in cfg.blocks:
                cfg.add_block(BasicBlock(to_block))
                # Add edge between blocks   
                row = df[df["basic_block_address"] == to_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[to_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]
            
            cfg.add_edge(from_block, to_block)
            # print row from df where "block" column is from_block
    return cfg

In [161]:
cfg = extract_cfg("aha-mont64")  # Replace with the actual benchmark name
# print blocks in CFG
with open("cfg_blocks_output.txt", "w") as file:
    for block_name, block in cfg.blocks.items():
        file.write(f"Block: {block_name}, Execution Count: {block.execution_count}, Execution Time: {block.execution_time}\n")
        file.write(f"Successors: {block.successors}\n")
        file.write(f"Predecessors: {block.predecessors}\n\n")
        print(f"Block: {block_name}, Execution Count: {block.execution_count}, Execution Time: {block.execution_time}")
        print(f"Successors: {block.successors}")
        print(f"Predecessors: {block.predecessors}")
        print()


Block: 10c38, Execution Count: 1, Execution Time: 140.0
Successors: {'15cf8'}
Predecessors: {'1392c'}

Block: 15cf8, Execution Count: 0, Execution Time: 0
Successors: {'END'}
Predecessors: {'10c38'}

Block: 13914, Execution Count: 3, Execution Time: 47.333333333333336
Successors: {'138f8', '13924'}
Predecessors: {'158b8'}

Block: 13924, Execution Count: 1, Execution Time: 140.0
Successors: {'1392c'}
Predecessors: {'13914'}

Block: 15900, Execution Count: 1, Execution Time: 139.0
Successors: {'1587c'}
Predecessors: {'14790'}

Block: 1587c, Execution Count: 3, Execution Time: 74.66666666666667
Successors: {'15898'}
Predecessors: {'15870', '15900'}

Block: 13220, Execution Count: 1, Execution Time: 130.0
Successors: {'130d8'}
Predecessors: {'131f8'}

Block: 130d8, Execution Count: 1, Execution Time: 137.0
Successors: {'14790'}
Predecessors: {'13220'}

Block: 1478c, Execution Count: 2, Execution Time: 199.0
Successors: {'13014', '13cc0'}
Predecessors: {'13cbc', '12ff8'}

Block: 13014, Exec

## Extracting SESE Regions

### Basic

In [11]:
def extract_all_single_entry_exit_sequences(cfg):
    sese_regions = []

    for block_name in cfg.blocks:
        start = cfg.blocks[block_name]
        region = set()
        stack = [start.name]
        region_entry = {start.name}

        while stack:
            current = stack.pop()
            if current in region:
                continue
            region.add(current)

            block = cfg.blocks[current]
            legal = True

            for succ in block.successors:
                succ_block = cfg.blocks[succ]

                # Stop if successor has a predecessor outside the region
                external_preds = [
                    pred for pred in succ_block.predecessors if pred not in region 
                    and pred not in succ_block.successors
                ]
                if len(external_preds) > 0 and succ not in region:
                    legal = False
            if legal:
                sese_regions.append(region)
                for succ in block.successors:
                    stack.append(succ)

        # Identify entry and exit blocks of the region
        entry_candidates = [
            b for b in region if any(p not in region for p in cfg.blocks[b].predecessors)
        ]
        exit_candidates = [
            b for b in region if any(s not in region for s in cfg.blocks[b].successors)
        ]

        if len(entry_candidates) == 1 and len(exit_candidates) == 1 and len(region) > 1:
        # if len(region) > 1:
            sese_regions.append(region)

    return sese_regions


In [6]:
regions = extract_all_single_entry_exit_sequences(cfg)

with open("sese_regions_output.txt", "w") as file:
    for i, region in enumerate(regions):
        output = f"Region {i + 1}:\n"
        for block_name in region:
            block = cfg.blocks[block_name]
            output += f"  Block: {block_name}, Execution Count: {block.execution_count}, Execution Time: {block.execution_time}\n"
        output += "\n"
        print(output)
        file.write(output)

Region 1:
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: 10c38, Execution Count: 1, Execution Time: 140.0
  Block: END, Execution Count: 0, Execution Time: 0


Region 2:
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: 10c38, Execution Count: 1, Execution Time: 140.0
  Block: END, Execution Count: 0, Execution Time: 0


Region 3:
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: 10c38, Execution Count: 1, Execution Time: 140.0
  Block: END, Execution Count: 0, Execution Time: 0


Region 4:
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: END, Execution Count: 0, Execution Time: 0


Region 5:
  Block: 15cf8, Execution Count: 0, Execution Time: 0
  Block: END, Execution Count: 0, Execution Time: 0


Region 6:
  Block: 138f8, Execution Count: 2, Execution Time: 89.5
  Block: 13914, Execution Count: 3, Execution Time: 47.333333333333336
  Block: 10c38, Execution Count: 1, Execution Time: 140.0
  Block: 15cf8, Execution Count: 0, Exec

### Using Dominator and Post Dominator

In [None]:
def extract_sese_regions(cfg):
    def compute_dominators(cfg, start):
        dom = {b: set(cfg.blocks.keys()) for b in cfg.blocks}
        dom[start] = {start}
        changed = True
        while changed:
            changed = False
            for b in cfg.blocks:
                if b == start:
                    continue
                preds = cfg.blocks[b].predecessors
                if not preds:
                    continue
                new_dom = set(cfg.blocks.keys())
                for p in preds:
                    new_dom &= dom[p]
                new_dom.add(b)
                if dom[b] != new_dom:
                    dom[b] = new_dom
                    changed = True
        return dom

    def compute_postdominators(cfg, end):
        postdom = {b: set(cfg.blocks.keys()) for b in cfg.blocks}
        postdom[end] = {end}
        changed = True
        while changed:
            changed = False
            for b in cfg.blocks:
                if b == end:
                    continue
                succs = cfg.blocks[b].successors
                if not succs:
                    continue
                new_postdom = set(cfg.blocks.keys())
                for s in succs:
                    new_postdom &= postdom[s]
                new_postdom.add(b)
                if postdom[b] != new_postdom:
                    postdom[b] = new_postdom
                    changed = True
        return postdom

    regions = []
    all_blocks = list(cfg.blocks.keys())
    doms = compute_dominators(cfg, all_blocks[0])  # assume entry point is first block
    postdoms_by_exit = {b: compute_postdominators(cfg, b) for b in cfg.blocks}

    for entry in cfg.blocks:
        for exit in cfg.blocks:
            if entry == exit:
                continue

            # Collect region blocks reachable from entry, ending at exit
            region = set()
            stack = [entry]
            while stack:
                node = stack.pop()
                if node in region:
                    continue
                region.add(node)
                for succ in cfg.blocks[node].successors:
                    if succ != exit and succ not in region:
                        stack.append(succ)
            region.add(exit)

            # Validate region: only entry has outside predecessors, only exit has outside successors
            valid = True
            for node in region:
                if node != entry:
                    for pred in cfg.blocks[node].predecessors:
                        if pred not in region:
                            valid = False
                if node != exit:
                    for succ in cfg.blocks[node].successors:
                        if succ not in region:
                            valid = False

            # Check entry dominance and exit postdominance
            if valid:
                if all(entry in doms[b] for b in region if b != entry) and \
                   all(exit in postdoms_by_exit[exit][b] for b in region if b != exit):
                    regions.append((entry, exit, region))

    return regions


### Visualization

In [163]:
def visualize_sese_region(cfg, entry, exit, region, filename='sese_region'):
    dot = Digraph(comment=f'SESE Region from {entry} to {exit}')
    
    # Highlight region blocks
    for name, block in cfg.blocks.items():
        if name in region:
            color = 'lightblue' if name == entry else ('lightgreen' if name == exit else 'white')
            dot.node(name, shape='box', style='filled', fillcolor=color)
        else:
            dot.node(name, shape='box', style='dashed', fillcolor='gray', fontcolor='gray')

    # Add edges
    for name, block in cfg.blocks.items():
        for succ in block.successors:
            if name in region and succ in region:
                dot.edge(name, succ, color='black')
            else:
                dot.edge(name, succ, style='dashed', color='gray')

    dot.render(filename, format='png', cleanup=True)
    print(f"Rendered SESE region to {filename}.png")


In [164]:
regions = extract_sese_regions(cfg)


In [165]:
# Pick one SESE region to visualize
entry, exit, region_blocks = regions[0]

# Visualize it
visualize_sese_region(cfg, entry, exit, region_blocks)

Rendered SESE region to sese_region.png


In [166]:
from graphviz import Digraph
import random

def visualize_cfg_with_sese_regions(cfg, sese_regions, filename='cfg_with_sese'):
    dot = Digraph(comment='Full CFG with SESE regions')
    dot.attr(compound='true', fontsize='10')

    # Assign each SESE region a unique color
    color_palette = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'pink', 'cyan']
    
    for i, (entry, exit, region) in enumerate(sese_regions):
        color = color_palette[i % len(color_palette)]
        with dot.subgraph(name=f'cluster_{i}') as c:
            c.attr(style='dashed', color=color, label=f'SESE {i}: {entry}->{exit}')
            for node in region:
                fill = 'lightblue' if node == entry else ('lightgreen' if node == exit else 'white')
                c.node(node, style='filled', fillcolor=fill)

    # Add all nodes (for completeness)
    for name in cfg.blocks:
        if not any(name in region for _, _, region in sese_regions):
            dot.node(name, style='dashed', fillcolor='gray', fontcolor='gray')

    # Add edges
    for name, block in cfg.blocks.items():
        for succ in block.successors:
            dot.edge(name, succ)

    # Render the graph
    dot.render(filename, format='png', cleanup=True)
    print(f"Rendered CFG with SESE regions to {filename}.png")


In [21]:
visualize_cfg_with_sese_regions(cfg, regions, filename='cfg_with_sese')

Rendered CFG with SESE regions to cfg_with_sese.png


In [169]:
filtered_regions = [
    (entry, exit, region) for entry, exit, region in regions
    if cfg.blocks[entry].execution_count > 1 and len(region) < 10
]

# Print the filtered regions
with open("filtered_regions_output.txt", "w") as file:
    for i, (entry, exit, region) in enumerate(filtered_regions):
        file.write(f"Region {i + 1}:\n")
        file.write(f"  Entry: {entry}\n")
        file.write(f"  Exit: {exit}\n")
        file.write(f"  Blocks: {sorted(region)}\n\n")
        print(f"Region {i + 1}:")
        print(f"  Entry: {entry}")
        print(f"  Exit: {exit}")
        print(f"  Blocks: {sorted(region)}\n")

Region 1:
  Entry: 1587c
  Exit: 13914
  Blocks: ['12c50', '12c54', '13914', '1587c', '15898', '158ac', '158b0', '158b8']

Region 2:
  Entry: 1587c
  Exit: 12c54
  Blocks: ['12c50', '12c54', '1587c', '15898', '158ac', '158b0']

Region 3:
  Entry: 1587c
  Exit: 158b8
  Blocks: ['12c50', '12c54', '1587c', '15898', '158ac', '158b0', '158b8']

Region 4:
  Entry: 1587c
  Exit: 15898
  Blocks: ['1587c', '15898']

Region 5:
  Entry: 1587c
  Exit: 158ac
  Blocks: ['1587c', '15898', '158ac']

Region 6:
  Entry: 1587c
  Exit: 12c50
  Blocks: ['12c50', '1587c', '15898', '158ac']

Region 7:
  Entry: 1587c
  Exit: 158b0
  Blocks: ['12c50', '1587c', '15898', '158ac', '158b0']

Region 8:
  Entry: 12c54
  Exit: 13914
  Blocks: ['12c54', '13914', '158b8']

Region 9:
  Entry: 12c54
  Exit: 158b8
  Blocks: ['12c54', '158b8']

Region 10:
  Entry: 158b8
  Exit: 13914
  Blocks: ['13914', '158b8']

Region 11:
  Entry: 15898
  Exit: 13914
  Blocks: ['12c50', '12c54', '13914', '15898', '158ac', '158b0', '158b8

In [168]:
with open("regions_output.txt", "w") as file:
    for i, (entry, exit, region) in enumerate(regions):
        file.write(f"Region {i + 1}:\n")
        file.write(f"  Entry: {entry}\n")
        file.write(f"  Exit: {exit}\n")
        file.write(f"  Blocks: {sorted(region)}\n\n")