In [61]:

import os
import re
from pathlib import Path
from typing import Any, Dict
from collections import OrderedDict
import json
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
sns.set_theme(style="whitegrid")
%matplotlib agg

In [2]:
requiredStats = {
    # --- Basic Simulation Metrics ---
    'simSeconds': r'simSeconds\s+([0-9.]+)',
    'cpi': r'board\.processor\.switch\.core\.cpi\s+([0-9.]+)',
    'ipc': r'board\.processor\.switch\.core\.ipc\s+([0-9.]+)',
    'numInstructions': r'simInsts\s+([0-9]+)',
    'numCycles': r'board\.processor\.switch\.core\.numCycles\s+([0-9]+)',

    # --- Branch Prediction ---
    'branchPredicted': r'board\.processor\.switch\.core\.branchPred\.condPredicted\s+([0-9]+)',
    'branchMispredicted': r'board\.processor\.switch\.core\.commit\.branchMispredicts\s+([0-9]+)',

    # --- Cache Miss Rates ---
    'l1iMissRate': r'board\.cache_hierarchy\.l1icaches\.overallMissRate::total\s+([0-9.]+)',
    'l1dMissRate': r'board\.cache_hierarchy\.l1dcaches\.overallMissRate::total\s+([0-9.]+)',
    'l2MissRate': r'board\.cache_hierarchy\.l2cache\.overallMissRate::total\s+([0-9.]+)',

    # --- Cache Accesses ---
    'l1iAccesses': r'board\.cache_hierarchy\.l1icaches\.overallAccesses::total\s+([0-9]+)',
    'l1dAccesses': r'board\.cache_hierarchy\.l1dcaches\.overallAccesses::total\s+([0-9]+)',
    'l2Accesses': r'board\.cache_hierarchy\.l2cache\.overallAccesses::total\s+([0-9]+)',

    # --- Pipeline Stalls & Bubbles (Cycles) ---
    'decodeBlockedCycles': r'board\.processor\.switch\.core\.decode\.blockedCycles\s+([0-9]+)',
    'decodeIdleCycles': r'board\.processor\.switch\.core\.decode\.idleCycles\s+([0-9]+)', # Represents fetch bubbles
    'renameBlockedCycles': r'board\.processor\.switch\.core\.rename\.blockCycles\s+([0-9]+)',
    'iewBlockedCycles': r'board\.processor\.switch\.core\.iew\.blockCycles\s+([0-9]+)',
    'icacheStallCycles': r'board\.processor\.switch\.core\.fetchStats0\.icacheStallCycles\s+([0-9]+)',
    
    # --- Resource Full Events (Stall Causes) ---
    'robFullEvents': r'board\.processor\.switch\.core\.rename\.ROBFullEvents\s+([0-9]+)',
    'iqFullEvents': r'board\.processor\.switch\.core\.rename\.IQFullEvents\s+([0-9]+)',

    # --- Squash Counts ---
    'squashCycles': r'board\.processor\.switch\.core\.fetch\.squashCycles\s+([0-9]+)',
    'commitSquashedInsts': r'board\.processor\.switch\.core\.commit\.commitSquashedInsts\s+([0-9]+)',

    # --- Runtime Breakdown (Execution Cycles) ---
    'fetchRunCycles': r'board\.processor\.switch\.core\.fetch\.cycles\s+([0-9]+)',
    'decodeRunCycles': r'board\.processor\.switch\.core\.decode\.runCycles\s+([0-9]+)',
    'renameRunCycles': r'board\.processor\.switch\.core\.rename\.runCycles\s+([0-9]+)',
}

In [3]:
def collectStats(outputDir):
    data = []
    for root, dirs, files in os.walk(outputDir):
        if 'stats.txt' in files:
            statsFilePath = Path(root) / 'stats.txt'
            try:
                predictor = statsFilePath.parent.name
                benchmarkSize = statsFilePath.parent.parent.name
                benchmark, size = benchmarkSize.rsplit('-', 1)
                
                currentStats: Dict[str, Any] = {
                    'benchmark': benchmark,
                    'size': size,
                    'predictor': predictor
                }

                with open(statsFilePath, 'r') as f:
                    content = f.read()
                    
                if content.strip() == "":
                    print(f"Stats file {statsFilePath} is empty. Skipping.")
                    continue

                for key, pattern in requiredStats.items():
                    match = re.search(pattern, content)
                    if match:
                        currentStats[key] = float(match.group(1))
                    else:
                        currentStats[key] = np.nan
                
                data.append(currentStats)
                print(f"  Successfully parsed: {benchmarkSize}/{predictor}")

            except (ValueError, IndexError) as e:
                print(f"Could not parse path structure for {statsFilePath}. Skipping. Error: {e}")
            except Exception as e:
                print(f"An unexpected error occurred while processing {statsFilePath}: {e}")
    return data

In [4]:
scriptDir = Path.cwd()
outputDir = scriptDir.parent / 'output'
data = collectStats(outputDir)

Stats file d:\Kaushik\IITD\Coursework\COL7418\gem5-branch-predictors\output\blackscholes-large\bimodal\stats.txt is empty. Skipping.
Stats file d:\Kaushik\IITD\Coursework\COL7418\gem5-branch-predictors\output\blackscholes-large\local\stats.txt is empty. Skipping.
  Successfully parsed: blackscholes-large/ltage
  Successfully parsed: blackscholes-large/perceptron
  Successfully parsed: blackscholes-large/tage
  Successfully parsed: blackscholes-large/tournament
  Successfully parsed: blackscholes-medium/bimodal
  Successfully parsed: blackscholes-medium/local
  Successfully parsed: blackscholes-medium/ltage
  Successfully parsed: blackscholes-medium/perceptron
Stats file d:\Kaushik\IITD\Coursework\COL7418\gem5-branch-predictors\output\blackscholes-medium\tage\stats.txt is empty. Skipping.
  Successfully parsed: blackscholes-small/bimodal
  Successfully parsed: blackscholes-small/local
  Successfully parsed: blackscholes-small/ltage
  Successfully parsed: blackscholes-small/perceptron
  

In [10]:
df = pd.DataFrame(data)
if not df.empty:
    cols = ['benchmark', 'size', 'predictor'] + sorted([col for col in df.columns if col not in ['benchmark', 'size', 'predictor']])
    df = df[cols]

In [11]:
df.columns

Index(['benchmark', 'size', 'predictor', 'branchMispredicted',
       'branchPredicted', 'commitSquashedInsts', 'cpi', 'decodeBlockedCycles',
       'decodeIdleCycles', 'decodeRunCycles', 'fetchRunCycles',
       'icacheStallCycles', 'iewBlockedCycles', 'ipc', 'iqFullEvents',
       'l1dAccesses', 'l1dMissRate', 'l1iAccesses', 'l1iMissRate',
       'l2Accesses', 'l2MissRate', 'numCycles', 'numInstructions',
       'renameBlockedCycles', 'renameRunCycles', 'robFullEvents', 'simSeconds',
       'squashCycles'],
      dtype='object')

In [12]:
# Calculate CPI and IPC
df['cpi'] = df['numCycles'] / df['numInstructions']
df['ipc'] = df['numInstructions'] / df['numCycles']

# Calculate Branch Misprediction Rate
df['mispredictRate'] = df['branchMispredicted'] / df['branchPredicted']

# Calculate Misses Per Kilo-Instruction (MPKI)
l1i_misses = df['l1iMissRate'] * df['l1iAccesses']
l1d_misses = df['l1dMissRate'] * df['l1dAccesses']
l2_misses = df['l2MissRate'] * df['l2Accesses']

df['l1i_MPKI'] = l1i_misses * 1000 / df['numInstructions']
df['l1d_MPKI'] = l1d_misses * 1000 / df['numInstructions']
df['l2_MPKI'] = l2_misses * 1000 / df['numInstructions']
df['branch_MPKI'] = df['branchMispredicted'] * 1000 / df['numInstructions']

In [14]:
columnOrder = [
    # 1. Experiment metadata
    "benchmark", "size", "predictor",

    # 2. Instruction/branch-level stats
    "numInstructions", "branchPredicted", "branchMispredicted",
    "mispredictRate", "branch_MPKI",

    # 3. Cache & memory stats
    "l1iAccesses", "l1iMissRate", "l1i_MPKI",
    "l1dAccesses", "l1dMissRate", "l1d_MPKI",
    "l2Accesses", "l2MissRate", "l2_MPKI",

    # 4. Cycle & stall breakdown
    "numCycles", "decodeRunCycles", "decodeBlockedCycles", "decodeIdleCycles",
    "fetchRunCycles", "icacheStallCycles", "iewBlockedCycles",
    "renameRunCycles", "renameBlockedCycles", "squashCycles",
    "commitSquashedInsts", "iqFullEvents", "robFullEvents",

    # 5. Performance metrics
    "ipc", "cpi", "simSeconds"
]
existingColumns = [col for col in columnOrder if col in df.columns]
df = df[existingColumns]

In [15]:
df

Unnamed: 0,benchmark,size,predictor,numInstructions,branchPredicted,branchMispredicted,mispredictRate,branch_MPKI,l1iAccesses,l1iMissRate,...,iewBlockedCycles,renameRunCycles,renameBlockedCycles,squashCycles,commitSquashedInsts,iqFullEvents,robFullEvents,ipc,cpi,simSeconds
0,blackscholes,large,ltage,7576981000.0,82544187.0,2248755.0,0.027243,0.296788,273404630.0,0.000323,...,45561150.0,568975030.0,51673000.0,4557768.0,198502200.0,69944406.0,96911.0,9.346253,0.106995,0.253749
1,blackscholes,large,perceptron,7654323000.0,82552520.0,2249764.0,0.027253,0.293921,273409609.0,0.000326,...,45719940.0,568968665.0,51848700.0,4561870.0,198601500.0,69960769.0,101322.0,9.437217,0.105963,0.253869
2,blackscholes,large,tage,7550722000.0,82551417.0,2248731.0,0.02724,0.297817,273408655.0,0.000324,...,45814360.0,568985063.0,51938240.0,4557544.0,198571000.0,69966162.0,99373.0,9.310425,0.107406,0.253843
3,blackscholes,large,tournament,7588853000.0,82573994.0,2250450.0,0.027254,0.296547,273423318.0,0.000338,...,45796870.0,569003155.0,51968040.0,4562802.0,198615200.0,69953617.0,99673.0,9.352467,0.106924,0.253977
4,blackscholes,medium,bimodal,5498957000.0,21545337.0,586143.0,0.027205,0.106592,69218536.0,0.00088,...,12263280.0,143732062.0,13744570.0,1206316.0,50528370.0,17614214.0,28575.0,26.470155,0.037778,0.065023
5,blackscholes,medium,local,8459746000.0,21544975.0,586272.0,0.027212,0.069301,69188767.0,0.000909,...,12020550.0,143724276.0,13506600.0,1205118.0,50416910.0,17611572.0,27564.0,40.932792,0.02443,0.06572
6,blackscholes,medium,ltage,5481597000.0,21551539.0,583893.0,0.027093,0.106519,69211759.0,0.000866,...,12173820.0,143739950.0,13636500.0,1200934.0,50394190.0,17606340.0,25618.0,26.416317,0.037855,0.064952
7,blackscholes,medium,perceptron,8711549000.0,21552406.0,586920.0,0.027232,0.067373,69200606.0,0.00085,...,11966790.0,143747407.0,13440830.0,1203820.0,50440920.0,17617250.0,28106.0,42.164878,0.023716,0.065672
8,blackscholes,small,bimodal,4935469000.0,6359076.0,171198.0,0.026922,0.034687,18240425.0,0.002586,...,2481094.0,37621853.0,2755436.0,365420.0,13730720.0,4397506.0,8085.0,89.247299,0.011205,0.01731
9,blackscholes,small,local,4958329000.0,6349020.0,171141.0,0.026955,0.034516,18231413.0,0.002602,...,2461394.0,37605868.0,2728444.0,365080.0,13727180.0,4396985.0,7795.0,89.775449,0.011139,0.017287


In [16]:
csvPath = scriptDir / 'simResult.csv'
df.to_csv(csvPath, index=False)

In [17]:
jsonPath = scriptDir / 'simResult.json'
result = {}
for _, row in df.iterrows():
    benchmark = row['benchmark']
    size = row['size']
    predictor = row['predictor']
    
    # all the other stats in a dict
    datapoints = row.drop(['benchmark', 'size', 'predictor']).to_dict()
    
    result.setdefault(benchmark, {}) \
          .setdefault(size, {}) \
          .setdefault(predictor, datapoints)

with open(jsonPath, "w") as f:
    json.dump(result, f, indent=4)

In [21]:
plotDir = "figs"
os.makedirs(plotDir, exist_ok=True)

sns.set_theme(style="whitegrid", context="talk")

In [19]:
predOrder = (
    df.groupby("predictor")["ipc"]
      .mean()
      .sort_values(ascending=False)
      .index
      .tolist()
)
df["predictor"] = pd.Categorical(df["predictor"], categories=predOrder, ordered=True)

In [87]:
for bench in df["benchmark"].unique():
    plt.figure(figsize=(12, 7))
    ax = sns.barplot(data=df[df["benchmark"] == bench],
                     x="predictor", y="ipc", hue="size",
                     order=predOrder, palette="muted", edgecolor="black")
    for p in ax.patches:
        h = p.get_height()
        if pd.notna(h):
            ax.text(p.get_x() + p.get_width() / 2., h + 0.02,
                    f"{h:.2f}", ha="center", va="bottom", fontsize=9)
    plt.xticks(rotation=30, ha="right", fontsize=11)
    plt.yticks(fontsize=11)
    plt.xlabel("Predictor", fontsize=13)
    plt.ylabel("IPC", fontsize=13)
    plt.title(f"IPC by Predictor for {bench} (hue = size)",
              fontsize=15, weight="bold", pad=15)
    plt.grid(axis="y", linestyle="--", alpha=0.6)
    plt.tight_layout()
    plt.savefig(os.path.join(plotDir,
               f"ipc_by_predictor_{bench}.png"), dpi=300)
    plt.close()

In [88]:
avgIPC = (df.groupby("predictor", as_index=False)["ipc"]
            .mean()
            .sort_values("ipc", ascending=False))
plt.figure(figsize=(12, 7))
sns.barplot(data=avgIPC, x="predictor", y="ipc",
            order=predOrder, hue="predictor",
            edgecolor="black")
for i, row in avgIPC.iterrows():
    plt.text(i, row["ipc"] + 0.02, f"{row['ipc']:.2f}",
             ha="center", va="bottom", fontsize=10)
plt.title("Overall Mean IPC by Predictor",
          fontsize=16, weight="bold", pad=15)
plt.xlabel("Predictor", fontsize=13)
plt.ylabel("Mean IPC", fontsize=13)
plt.xticks(rotation=30, ha="right", fontsize=11)
plt.yticks(fontsize=11)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.savefig(os.path.join(plotDir,
           "overall_mean_ipc_by_predictor.png"), dpi=300)
plt.close()

  avgIPC = (df.groupby("predictor", as_index=False)["ipc"]


In [89]:
cols = ['ipc', 'mispredictRate', 'branch_MPKI',
        'l1iMissRate', 'l1i_MPKI',
        'l1dMissRate', 'l1d_MPKI',
        'l2MissRate', 'l2_MPKI']
corr = df[cols].corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=False, fmt=".2f",
            cmap="coolwarm", center=0,
            square=True, linewidths=0.5,
            linecolor="gray",
            cbar_kws={"shrink": 0.7, "aspect": 30})
plt.title("Correlation Heatmap (IPC vs Branching & Memory Metrics)",
          fontsize=16, weight="bold", pad=15)
plt.xticks(rotation=90, ha="right", fontsize=10)
plt.yticks(rotation=0, fontsize=10)
plt.tight_layout()
plt.savefig(os.path.join(plotDir, "correlation_heatmap.png"), dpi=300)
plt.close()

In [90]:
complexity = ["bimodal", "local", "tournament",
              "tage", "ltage", "perceptron"]
mispAvg = (df.groupby(["predictor", "complexity"],
                      as_index=False)["mispredictRate"]
             .mean()
             .sort_values("complexity"))
plt.figure(figsize=(12, 7))
sns.lineplot(data=mispAvg, x="predictor",
             y="mispredictRate", marker="o",
             sort=False, linewidth=2, markersize=8,
             palette="muted")
plt.xticks(ticks=range(len(complexity)),
           labels=complexity, rotation=30, fontsize=12)
plt.yticks(fontsize=12)
plt.title("Misprediction Rate vs Predictor Complexity",
          fontsize=16, weight="bold")
plt.xlabel("Predictor (ordered by complexity)",
           fontsize=14)
plt.ylabel("Mean Misprediction Rate", fontsize=14)
plt.tight_layout()
plt.savefig(os.path.join(plotDir,
           "mispredict_vs_complexity.png"), dpi=200)
plt.close()

  mispAvg = (df.groupby(["predictor", "complexity"],
  sns.lineplot(data=mispAvg, x="predictor",


In [91]:
fig, ax = plt.subplots(figsize=(12, 7))
sns.scatterplot(data=df, x="mispredictRate", y="ipc",
                hue="predictor", style="benchmark",
                s=100, alpha=0.8, ax=ax, palette="muted")
sns.regplot(data=df, x="mispredictRate", y="ipc",
            scatter=False, ax=ax, color="black",
            line_kws={"lw": 2, "alpha": 0.7})
ax.set_title("IPC vs Misprediction Rate",
             fontsize=14, weight="bold")
ax.set_xlabel("Misprediction Rate", fontsize=12)
ax.set_ylabel("IPC", fontsize=12)
ax.tick_params(axis='both', labelsize=12)
leg = ax.get_legend()
if leg:
    leg.remove()
handles, labels = ax.get_legend_handles_labels()
by_label = OrderedDict(zip(labels, handles))
fig.legend(by_label.values(), by_label.keys(),
           loc="upper center", ncol=4,
           frameon=True, fontsize=12)
plt.tight_layout(rect=[0, 0, 1, 0.9])
plt.savefig(os.path.join(plotDir,
           "ipc_vs_mispredict_scatter.png"), dpi=200)
plt.close()

In [92]:
pairVars = ["ipc", "robFullEvents", "iqFullEvents",
            "commitSquashedInsts", "branch_MPKI"]
pp = sns.pairplot(df, vars=pairVars, hue="predictor",
                  corner=True,
                  plot_kws={'alpha': 0.6, 's': 50,
                            'edgecolor': 'k'},
                  diag_kind='kde', palette="muted")
pp.fig.set_size_inches(14, 12)
plt.tight_layout()
plt.savefig(os.path.join(plotDir,
           "rob_ipc_improved.png"), dpi=200)
plt.close()

In [97]:
bimodal_ipc = df[df["predictor"] == "bimodal"].groupby("benchmark")["ipc"].mean().rename("bimodal_ipc")
speedup_df = df.groupby(["predictor", "benchmark"], as_index=False)["ipc"].mean().merge(bimodal_ipc, on="benchmark")
speedup_df["speedup_vs_bimodal"] = speedup_df["ipc"] / speedup_df["bimodal_ipc"]

plt.figure(figsize=(14, 8))
sns.barplot(data=speedup_df, x="predictor", y="speedup_vs_bimodal", hue="benchmark",
            order=predOrder, palette="muted", edgecolor="black")
plt.title("Speedup of Predictors Relative to Bimodal (by Benchmark)", fontsize=16, weight="bold", pad=15)
plt.xlabel("Predictor", fontsize=13)
plt.ylabel("Speedup (IPC / IPC_bimodal)", fontsize=13)
plt.xticks(rotation=30, ha="right", fontsize=11)
plt.yticks(fontsize=11)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.savefig(os.path.join(plotDir, "speedup_vs_bimodal_by_benchmark.png"), dpi=300)
plt.close()

  speedup_df = df.groupby(["predictor", "benchmark"], as_index=False)["ipc"].mean().merge(bimodal_ipc, on="benchmark")
