# Imports

In [18]:

import xmltodict
import re
from tqdm.notebook import tqdm
import subprocess
import json
import os
from time import sleep
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from itertools import product

# XML Parser

In [19]:
xml_file = 'config_tigerLake.xml'
with open(xml_file, 'r') as file:
    xml_data = file.read()

xml_dict = xmltodict.parse(xml_data)

# Stat File Parser

In [20]:
def extract_stats(stat_file):
    # Read the configuration file
    with open(stat_file, "r") as file:
        config_data = file.read()

    to_float = lambda lis: [float(x) for x in lis]
    
    # Define regex patterns for each metric
    l1d_hit_rate_pattern = r"L1\[0\] Hit-Rate\s*=\s*([\d.]+)"
    l1i_hit_rate_pattern = r"I1\[0\] Hit-Rate\s*=\s*([\d.]+)"
    l2_hit_rate_pattern = r"L2\[0\] Hit-Rate\s*=\s*([\d.]+)"
    l3_hit_rate_pattern = r"L3\[0\] Hit-Rate\s*=\s*([\d.]+)"
    l1i_tlb_hit_rate_pattern = r"iTLB\[0\] Hit-Rate\s*=\s*([\d.]+)"
    l1d_tlb_hit_rate_pattern = r"dTLB\[0\] Hit-Rate\s*=\s*([\d.]+)"
    ipc_pattern = r"IPC\s*=\s*([\d.]+)\s+in terms of micro-ops"
    branch_prediction_accuracy_pattern = r"branch predictor accuracy\s*=\s*([\d.]+)"
    total_execution_time_pattern = r"Total Execution Time\s*=\s*([\d\s:]+)"
    micro_op_cache_hit_rate_pattern = r"micro-op cache hit rate\s*=\s*([\d.]+)"
    target_predictor_accuracy_pattern = r"target predictor accuracy\s*=\s*([\d.]+)"
    predicate_predictor_accuracy_pattern = r"predicate predictor accuracy\s*=\s*([\d.]+)"

    # Function to extract a metric using the given pattern
    def extract_metric(pattern, data):
        match = re.search(pattern, data)
        if match:
            return match.group(1)
        return None

    # Extract metrics from the configuration data
    l1d_hit_rate = extract_metric(l1d_hit_rate_pattern, config_data)
    l1i_hit_rate = extract_metric(l1i_hit_rate_pattern, config_data)
    l2_hit_rate = extract_metric(l2_hit_rate_pattern, config_data)
    l3_hit_rate = extract_metric(l3_hit_rate_pattern, config_data)
    l1i_tlb_hit_rate = extract_metric(l1i_tlb_hit_rate_pattern, config_data)
    l1d_tlb_hit_rate = extract_metric(l1d_tlb_hit_rate_pattern, config_data)
    ipc = extract_metric(ipc_pattern, config_data)
    branch_prediction_accuracy = extract_metric(branch_prediction_accuracy_pattern, config_data)
    time_taken = extract_metric(total_execution_time_pattern, config_data)
    micro_op_cache_hit_rate = extract_metric(micro_op_cache_hit_rate_pattern, config_data)
    target_predictor_accuracy = extract_metric(target_predictor_accuracy_pattern, config_data)
    predicate_predictor_accuracy = extract_metric(predicate_predictor_accuracy_pattern, config_data)
    core_energy = config_data.split("coreEnergy.total")[1].split("\n")[0].strip().split()
    shared_cache_energy = config_data.split("sharedCacheEnergy.total")[1].split("\n")[0].strip().split()
    main_memory_energy = config_data.split("mainMemoryControllerEnergy.total")[1].split("\n")[0].strip().split()
    coherence_energy = config_data.split("coherenceEnergy.total")[1].split("\n")[0].strip().split()
    total_energy = config_data.split("TotalEnergy")[-1].split()

    # Print the extracted metrics
    data = {
        "L1d Hit rate": float(l1d_hit_rate),
        "L1i Hit rate": float(l1i_hit_rate),
        "L2 Hit rate": float(l2_hit_rate),
        "L3 Hit rate": float(l3_hit_rate),
        "L1i TLB hit rate": float(l1i_tlb_hit_rate),
        "L1d TLB hit rate": float(l1d_tlb_hit_rate),
        "IPC": float(ipc),
        "Branch Prediction Accuracy": float(branch_prediction_accuracy),
        "Time Taken": float(time_taken),
        "Micro-op Cache Hit Rate": float(micro_op_cache_hit_rate),
        "Target Predictor Accuracy": float(target_predictor_accuracy),
        "Predicate Predictor Accuracy": float(predicate_predictor_accuracy) if predicate_predictor_accuracy is not None else 0,
        "Core Energy": to_float(core_energy),
        "Shared Cache Energy": to_float(shared_cache_energy),
        "Main Memory Controller Energy": to_float(main_memory_energy),
        "Coherence Energy": to_float(coherence_energy),
        "Total Energy": to_float(total_energy),
    }
    return data

extract_stats('gcc.stat')

{'L1d Hit rate': 0.9544035,
 'L1i Hit rate': 0.79096586,
 'L2 Hit rate': 0.9847691,
 'L3 Hit rate': 0.20170666,
 'L1i TLB hit rate': 0.9988,
 'L1d TLB hit rate': 0.9954,
 'IPC': 1.7945,
 'Branch Prediction Accuracy': 89.7722,
 'Time Taken': 2228.0,
 'Micro-op Cache Hit Rate': 0.8279,
 'Target Predictor Accuracy': 82.4859,
 'Predicate Predictor Accuracy': 97.4274,
 'Core Energy': [390213428.5796, 15769978.6897, 405983407.2693, 0.0],
 'Shared Cache Energy': [25300579.2864, 25024.6114, 25325603.8978, 0.0],
 'Main Memory Controller Energy': [976185.1416, 1213.0112, 977398.1528, 0.0],
 'Coherence Energy': [14602659.9264, 0.0, 14602659.9264, 0.0],
 'Total Energy': [581692812.7244, 16138704.8587, 597831517.5831, 0.0]}

# Others

In [21]:
jar_file = "/home/shashankp/Desktop/GitHub_S/Advanced-CompArch/cs810_resources/Tejas/jars/tejas.jar"
config_path = "./config_tigerLake.xml"
benchmarks = ['gcc', 'lbm', 'mcf', 'namd', 'xalancbmk']
specifications = {
    "BranchPredictor-Predictor_Mode": ["NoPredictor", "PerfectPredictor", "AlwaysTaken", "AlwaysNotTaken", "Tournament", "Bimodal", "GShare", "GAg", "GAp", "PAg", "PAp", "TAGE", "TAGE-SC-L"],
    "MainMemory-MainMemoryLatency": [10, 50, 100, 500],
    "Core-CoreFrequency": [100, 500, 2000, 5000],
    "BranchPredictor-BHRsize": [2, 4, 8, 16, 32, 64],
    "MainMemory-MainMemoryFrequency": [100, 500, 2000, 5000],
    "ITLB-Size": [10, 50, 200, 500], 
    "DTLB-Size": [10, 50, 200, 500],
    "IntVectorMul-Latency": [1, 5, 10, 20],
    "FloatMul-Latency": [1, 5, 10],
    "FloatALU-Latency": [1, 5, 10],
    "FMA-Latency": [1, 5, 10],
}
with open('command.txt', "w+") as file:
    pass

# Modify Config File

In [22]:
def modify_config_file_subtree(old_file, new_file, key1, key2, value):
    import xml.etree.ElementTree as ET
    value = str(value)
    def change_element_recursive(root, element_name):
        # Check if the current element's tag matches the desired element name
        if root.tag == element_name:
            return root

        # Recursively search through child elements
        for child in root:
            found_element = change_element_recursive(child, element_name)
            if found_element is not None:
                return found_element

        # Element not found in this branch
        return None

    # Parse the XML file
    tree = ET.parse(old_file)
    root = tree.getroot()

    # Call the recursive function to find the element
    v = change_element_recursive(root, key1)
    v1 = change_element_recursive(v, key2)
    v1.text=value
    if v is None:
        print("Key doesn't exist")

    tree.write(new_file)

# Tejas Runner

In [23]:
def run_tejas(benchmark, jar_file, config_file, stat_file):
    bench_mark_path = f"/home/shashankp/Desktop/GitHub_S/Advanced-CompArch/cs810_resources/CPU2017_benchmarks/tejas_traces/{benchmark}"
    command = (f'java -jar {jar_file} {config_file} {stat_file} {bench_mark_path}').split()
    with open('command.txt', 'a') as file:
        full_config_path = os.path.abspath(config_file)
        command[3] = full_config_path
        file.write(' '.join(command) + "\n")
    subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Save Metrics

In [24]:
def save_metrics(data, file_path):
    with open(file_path, 'w') as file:
        json.dump(data, file)

# Experiments

In [25]:
total = 0
for benchmark in benchmarks:
    for key in specifications:
        total += len(specifications[key])
pb = tqdm(total=total)

full_result = dict()
for benchmark in benchmarks:
    full_result[benchmark] = {}
    for key in specifications:
        full_result[benchmark][key] = {}
        key1, key2 = key.split('-')
        for value in specifications[key]:
            sleep(0.05)
            new_config = f"./config/{benchmark}_{key1}-{key2}_{value}.xml"
            new_stat_file = f"./stats/{benchmark}_{key1}-{key2}_{value}.stat"
            if new_stat_file.split('/')[-1] in os.listdir('./stats'):
                pb.update(1)
                continue
            modify_config_file_subtree(config_path, new_config, key1, key2, value)
            run_tejas(benchmark, jar_file, new_config, new_stat_file)
            extracted_data = extract_stats(new_stat_file)
            save_metrics(extracted_data, f"./metrics/{benchmark}_{key1}-{key2}_{value}.json")
            pb.update(1)

  0%|          | 0/260 [00:00<?, ?it/s]

# Data Extractor

In [None]:
base_directory = "./metrics/"

benchmarks = ["gcc", "mcf", "namd", "lbm", "xalancbmk"]

suffixes = [
    "_BranchPredictor-BHRsize_",
    "_BranchPredictor-Predictor_Mode_",
    "_Core-CoreFrequency_",
    "_DTLB-Size_",
    "_FloatALU-Latency_",
    "_FloatMul-Latency_",
    "_FMA-Latency_",
    "_IntVectorMul-Latency_",
    "_ITLB-Size_",
    "_MainMemory-MainMemoryFrequency_",
    "_MainMemory-MainMemoryLatency_"
]

size_maps = {
    "_BranchPredictor-BHRsize_": [2, 4, 8, 16],
    "_BranchPredictor-Predictor_Mode_": ["AlwaysNotTaken", "AlwaysTaken", "Bimodal", "GAg", "GAp", "GShare", "NoPredictor", "PAg", "PAp", "PerfectPredictor", "TAGE-SC-L", "TAGE", "Tournament"],
    "_Core-CoreFrequency_": [100, 500, 2000, 5000],
    "_DTLB-Size_": [10, 50, 200, 500],
    "_FloatALU-Latency_": [1, 5, 10],
    "_FloatMul-Latency_": [1, 5, 10],
    "_FMA-Latency_": [1, 5, 10],
    "_IntVectorMul-Latency_": [1, 5, 10],
    "_ITLB-Size_": [10, 50, 200, 500],
    "_MainMemory-MainMemoryFrequency_": [100, 500, 2000, 5000],
    "_MainMemory-MainMemoryLatency_": [10, 50, 100, 500],
}

file_names = [x[0]+x[1] for x in list(product(benchmarks, suffixes))]

data_struct = {}

for s in suffixes:
    benchData = {}
    for benchmark_name in benchmarks:
        grp_data = {}
        for i, x in enumerate(size_maps[s]):
            try:
                with open(base_directory + benchmark_name + s + str(x)+'.json', 'r') as json_file:
                    data_dict = json.load(json_file)
                    # print(data_dict)
            except:
                data_dict = None
            # print(i)
            grp_data[str(x)] = data_dict
        benchData[benchmark_name] = grp_data
    data_struct[s] = benchData
    
with open('all_data.json', 'w') as json_file:
    json.dump(data_struct, json_file, indent=2)