In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from graphviz import Digraph
import json
import re

In [49]:
inst_df = pd.read_csv('dataset_bbs_embench.csv')

In [7]:
class BasicBlock:
    def __init__(self, name):
        self.name = name                  # Block label or address
        self.instructions = []           # List of instructions
        self.successors = set()          # Edges to next blocks
        self.predecessors = set()        # Incoming edges
        self.execution_count = 0          # Number of times this block was executed
        self.execution_time = 0          # Time taken to execute this block

class ControlFlowGraph:
    def __init__(self):
        self.blocks = {}                 # name -> BasicBlock

    def add_block(self, block):
        self.blocks[block.name] = block

    def add_edge(self, from_block, to_block):
        self.blocks[from_block].successors.add(to_block)
        self.blocks[to_block].predecessors.add(from_block)


In [40]:
def extract_cfg(benchmark):
    lines = []

    with open(f"profiles/cfgs/{benchmark}.dot", "r") as f:
        lines = f.readlines()
    
    if len(lines) == 0:
        print(f"Error: No lines found in {benchmark}.dot")
        return None

    df = pd.read_csv(f"profiles/dataset_full/{benchmark}_dataset.csv")
    if df.empty:
        print(f"Error: No data found in {benchmark}_dataset.csv")
        return None

    # Parse the .dot file to extract basic blocks and edges
    cfg = ControlFlowGraph()
    for line in lines:
        if "->" in line:
            parts = line.strip().split(" -> ")
            from_block = parts[0].strip().replace("\"","")
            # Remove the label from the block name
            to_block = parts[1].strip().split(";")[0].strip().replace("\"","").split("[")[0].strip()

            # Create blocks if they don't exist
            if from_block not in cfg.blocks:
                cfg.add_block(BasicBlock(from_block))
                
                row = df[df["basic_block_address"] == from_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[from_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]

            if to_block not in cfg.blocks:
                cfg.add_block(BasicBlock(to_block))
                # Add edge between blocks   
                row = df[df["basic_block_address"] == to_block]
                if not row.empty:
                    # Update the execution count and time for the block
                    block = cfg.blocks[to_block]
                    block.execution_count += row["times_executed"].values[0]
                    block.execution_time += row["time"].values[0]
            
            cfg.add_edge(from_block, to_block)
            # print row from df where "block" column is from_block
    return cfg

In [41]:
cfg = extract_cfg("aha-mont64")

In [43]:
def get_ordered_blocks(cfg, entry, exit, blocks):
    visited = set()
    ordered_blocks = []

    def dfs(block):
        if block in visited or block not in cfg.blocks:
            return
        visited.add(block)
        ordered_blocks.append(block)
        for successor in cfg.blocks[block].successors:
            if successor in blocks:
                dfs(successor)

    dfs(entry)
    if exit not in ordered_blocks and exit in blocks:
        ordered_blocks.append(exit)
    return ordered_blocks

In [50]:
with open("./profiles/regions/aha-mont64_filtered_regions.json", "r") as file:
    filtered_regions = json.load(file)

entry = filtered_regions[0]['entry']
exit = filtered_regions[0]['exit']
blocks = filtered_regions[0]['blocks']


ordered_blocks = get_ordered_blocks(cfg, entry, exit, blocks)
print(ordered_blocks)

['1587c', '15898', '158ac', '12c50', '158b0', '12c54', '158b8', '13914']


In [45]:
filtered_regions[0]

{'entry': '1587c',
 'exit': '13914',
 'blocks': ['12c50',
  '158b0',
  '158b8',
  '15898',
  '1587c',
  '158ac',
  '12c54',
  '13914']}

In [46]:
ordered_blocks = get_ordered_blocks(cfg, filtered_regions[0]['entry'], filtered_regions[0]['exit'], filtered_regions[0]['blocks'])
ordered_blocks

['1587c', '15898', '158ac', '12c50', '158b0', '12c54', '158b8', '13914']

In [51]:
ordered_blocks = get_ordered_blocks(cfg, filtered_regions[0]['entry'], filtered_regions[0]['exit'], filtered_regions[0]['blocks'])

df = pd.DataFrame(columns=["start_basic_block", "end_basic_block", "address", "instruction", "binary"])

for block in ordered_blocks:
    rows = inst_df[(inst_df["basic_block_address"] == block) & (inst_df["binary"] == "aha-mont64")]
    for _, row in rows.iterrows():
        df = pd.concat([df, pd.DataFrame({
            "start_basic_block": ordered_blocks[0],
            "end_basic_block": ordered_blocks[-1],
            "address": [row["address"]],
            "instruction": [row["instruction"]],
            "binary": [row["binary"]]
        })], ignore_index=True)

In [52]:
df

Unnamed: 0,start_basic_block,end_basic_block,address,instruction,binary
0,1587c,13914,1587c,"LD a1, 88(s0)",aha-mont64
1,1587c,13914,15880,"BEQ a1, zero, 24",aha-mont64
2,1587c,13914,15898,"LD a1, 120(s0)",aha-mont64
3,1587c,13914,1589c,"BEQ a1, zero, 16",aha-mont64
4,1587c,13914,158ac,"JAL ra, -11356",aha-mont64
5,1587c,13914,12c50,"JALR, zero, ra, 0",aha-mont64
6,1587c,13914,158b0,"SH zero, 16(s0)",aha-mont64
7,1587c,13914,158b4,"JAL ra, -11360",aha-mont64
8,1587c,13914,12c54,"JALR, zero, ra, 0",aha-mont64
9,1587c,13914,158b8,"LD ra, 24(sp)",aha-mont64


In [59]:
df = pd.DataFrame(columns=["start_basic_block", "end_basic_block", "address", "instruction", "binary"])

bins = os.listdir("./profiles/embench")
bins = [bin for bin in bins if bin != "nsichneu"]

for bin in bins:
    with open(f"./profiles/regions/{bin}_filtered_regions.json", "r") as file:
        filtered_regions = json.load(file)

        cfg = extract_cfg(bin)
        if cfg is None:
            print(f"Error: No CFG found for {bin}")
            continue

        for region in filtered_regions:
            ordered_blocks = get_ordered_blocks(cfg, region['entry'], region['exit'], region['blocks'])
            if not ordered_blocks:
                print(f"Error: No ordered blocks found for {bin}")
                continue

            for block in ordered_blocks:
                rows = inst_df[(inst_df["basic_block_address"] == block) & (inst_df["binary"] == bin)]
                for _, row in rows.iterrows():
                    df = pd.concat([df, pd.DataFrame({
                        "start_basic_block": region['entry'],
                        "end_basic_block": region['exit'],
                        "address": [row["address"]],
                        "instruction": [row["instruction"]],
                        "binary": [bin]
                    })], ignore_index=True)
df.to_csv("dataset_regions_embench.csv", index=False)
    