In [1]:
import pandas as pd
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

import numpy as np
import os
from collections import Counter
from matplotlib import pyplot as plt
from typing import List
import ray

In [2]:
cs = ['tagescl']
data = {}
for c in cs:
    data[c] = 0
    files = [f for f in os.listdir(f'./{c}/') if f.endswith('.csv')]
    for file in files:
        df = pd.read_csv(f'./{c}/{file}')
        # Calculate misprediction count
        misp_count = (df['actual'] != df['predicted']).sum()
        total_count = len(df)
        data[c] += (misp_count / total_count) / len(files)
    pass
print(data)
# {'tagescl': 0.03466319384656657, 'tagescl64': 0.02967903033877432, 'tagescl179': 0.028187454722456498, 'tagescl192': 0}

{'tagescl': 0.02967903033877432}


In [2]:
def misp_rate(df:pd.DataFrame):
    misps = df['actual'] != df['predicted']
    return misps.mean()

def get_misp_pcs(df:pd.DataFrame):
    misps = df['actual'] != df['predicted']
    pcs = df['pc'].values
    misp_pcs = pcs[misps]
    return misp_pcs

In [3]:
case = 'tagescl179'

def problematic_pcs(df:pd.DataFrame, n=100):
    misps = df['actual'] != df['predicted']
    pcs = df['pc'].values
    misp_pcs = pcs[misps]
    counter = Counter(misp_pcs)
    return counter.most_common(n)

def pc_bias(df:pd.DataFrame, pcs:List[str]):
    biases = {i:df[df['pc'] == pc]['actual'].mean() for i, pc in enumerate(pcs)}
    return biases

In [9]:
def load_correlations(file):
    df = pd.read_csv(file)
    K = 32
    records = []
    for i, row in df[df['inst_type'] == 0].iterrows():
        if i < 1000:
            continue
        window = df.iloc[max(0, i - K):i]
        loads = window[window['inst_type'] == 1]
        last = loads.tail(5)['address'].tolist()
        rec = {
            'pc': int(row['pc'], 16),
            'mispred': row['predicted'] != row['actual'],
            'dist_last_load': i - loads.index.max() if not loads.empty else K+1,
            'load_addr0': int(last[-1], 16) if len(last) > 0 else 'none',
            'load_addr1': int(last[-2], 16) if len(last) > 1 else 'none',
            'load_addr2': int(last[-3], 16) if len(last) > 2 else 'none',
            'load_addr3': int(last[-4], 16) if len(last) > 3 else 'none',
            'load_addr4': int(last[-5], 16) if len(last) > 4 else 'none',
        }
        rec['xor_pc_load0'] = rec['pc'] ^ (rec['load_addr0'] if rec['load_addr0']!='none' else 0)
        records.append(rec)
        pass
    return records

In [10]:
file = './tagescl/fp_0_trace_branch_misps.csv'

records = load_correlations(file)
df = pd.DataFrame.from_records(records)

In [None]:
brdf = df

cat_cols = ['load_addr0','load_addr1','load_addr2', 'load_addr3','load_addr4']
num_cols = ['dist_last_load','xor_pc_load0']
X = brdf[cat_cols + num_cols]
y = brdf['mispred']

enc = OneHotEncoder(handle_unknown='ignore')
ct  = ColumnTransformer([('cat',enc,cat_cols)], remainder='passthrough')
Xenc = ct.fit_transform(X)
mi = mutual_info_classif(Xenc, y, discrete_features=True)
print(sorted(zip(ct.get_feature_names_out(), mi), key=lambda x:-x[1])[:20])

In [None]:
file = './tagescl/fp_0_trace_branch_misps.csv'
df = pd.read_csv(file)
df['pc'] = df['pc'].apply(lambda x: int(x, 16))
df['address'] = df['address'].apply(lambda x: int(x, 16))
df['next_pc'] = df['next_pc'].apply(lambda x: int(x, 16))

branches = df[df['inst_type'] == 0]
loads = df[df['inst_type'] == 1]
misps = branches[branches['predicted'] != branches['actual']]

window_size = 32
correlated_loads = []
for i, row in branches.iterrows():
    window = df.iloc[max(0, i - window_size):i]
    loads = window[window['inst_type'] == 1]
    last = loads.tail(3)['address'].tolist()
    if not len(last) == 0:
        for _, load in last:
            correlated_loads.append({'PC':row})

## Bias analysis

In [4]:
files = [f for f in os.listdir('./tage/') if f.endswith('.csv')]
trace_biases = {}
if not ray.is_initialized():
    ray.init()
    
def get_biases(file):
    fp = f'./{case}/{file}'
    df = pd.read_csv(fp)
    df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    df['delta'] = (df['next_pc'].apply(int, base=16).astype(int) - 
                   df['pc'].apply(int, base=16).astype(int)).abs()
    pcs = problematic_pcs(df, 10)
    pcs = [pc[0] for pc in pcs]
    biases = pc_bias(df, pcs)
    return biases

@ray.remote
def process_file(file):
    return file, get_biases(file)

results = [process_file.remote(file) for file in files]

# Retrieve results
for file, bias in ray.get(results):
    trace_biases[file] = bias

2025-04-20 13:40:10,224	INFO worker.py:1821 -- Started a local Ray instance.


In [49]:
df = pd.DataFrame(trace_biases).T

def is_biased(value):
    return value < 0.3 or value > 0.7

df = df.applymap(is_biased)
df.to_csv('bias.csv', index_label='file')
df

  df = df.applymap(is_biased)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
int_19_trace_branch_misps.csv,True,True,True,True,True,False,True,True,True,True
web_15_trace_branch_misps.csv,True,True,True,False,True,True,False,True,True,False
int_4_trace_branch_misps.csv,False,True,True,False,False,True,False,True,True,False
compress_6_trace_branch_misps.csv,True,True,True,False,True,True,True,True,False,True
infra_6_trace_branch_misps.csv,False,False,True,True,True,True,True,False,True,True
...,...,...,...,...,...,...,...,...,...,...
infra_3_trace_branch_misps.csv,True,True,True,True,True,True,False,True,True,True
infra_5_trace_branch_misps.csv,False,True,True,False,True,True,False,False,False,True
infra_14_trace_branch_misps.csv,False,False,False,False,False,False,True,False,False,False
int_14_trace_branch_misps.csv,True,True,True,True,True,False,True,False,True,True


In [None]:
n = 10
def most_common_pcs(df, n=3):
    pc_counts = Counter(df['pc'])
    return [(pc, count) for pc, count in pc_counts.most_common(n)]

@ray.remote
def analyze_file(file):
    fp = f'./{case}/{file}'
    try:
        df = pd.read_csv(fp)
        
        misp_pcs = problematic_pcs(df, 1)
        if not misp_pcs:
            return file, None, "No mispredictions found"
        most_misp_pc = misp_pcs[0][0]
        
        common_pcs = most_common_pcs(df, n)
        common_pc_values = [pc for pc, _ in common_pcs]
        
        if most_misp_pc in common_pc_values:
            index = common_pc_values.index(most_misp_pc)
            return file, index, common_pcs, most_misp_pc
        else:
            return file, None, common_pcs, most_misp_pc
    except Exception as e:
        raise e

results = [analyze_file.remote(file) for file in files]
analysis_results = {}

for result in ray.get(results):
    file, index, common_pcs, most_misp_pc = result
    analysis_results[file] = {
        "index_in_top3": index,
        "top3_common_pcs": common_pcs if isinstance(common_pcs, list) else None,
        "most_mispredicted_pc": most_misp_pc
    }

# Count how many files have their most mispredicted PC in the top 3 common PCs
in_top3_count = sum(1 for result in analysis_results.values() if result["index_in_top3"] is not None)
print(f"Number of files with most mispredicted PC in top {n} common PCs: {in_top3_count}/{len(files)}")

indices_distribution = [result["index_in_top3"] for result in analysis_results.values() 
                       if result["index_in_top3"] is not None]
index_counts = Counter(indices_distribution)
for index, count in sorted(index_counts.items()):
    print(f"Index {index} (most common PC #{index+1}): {count} files")

results = pd.DataFrame(analysis_results).T
results.to_csv('most_common_vs_mispredicted.csv', index_label='file')

Number of files with most mispredicted PC in top 10 common PCs: 75/105
Index 0 (most common PC #1): 16 files
Index 1 (most common PC #2): 18 files
Index 2 (most common PC #3): 8 files
Index 3 (most common PC #4): 9 files
Index 4 (most common PC #5): 5 files
Index 5 (most common PC #6): 6 files
Index 6 (most common PC #7): 5 files
Index 7 (most common PC #8): 1 files
Index 8 (most common PC #9): 5 files
Index 9 (most common PC #10): 2 files


In [None]:
files = [f for f in os.listdir(f'./{case}/') if f.endswith('.csv')]

@ray.remote
def process_file_bias(file):
    fp = f'./{case}/{file}'
    try:
        df = pd.read_csv(fp)
        df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
        pcs = problematic_pcs(df, 10)
        pcs_list = [pc[0] for pc in pcs]
        biases = pc_bias(df, pcs_list)
        
        row = {'file': file}
        for i, pc in enumerate(pcs_list):
            row[f'pc{i+1}'] = pc
            row[f'bias{i+1}'] = biases[i]
            pc_df = df[df['pc'] == pc]
            row[f'misp_rate{i+1}'] = misp_rate(pc_df)
        return row
    except Exception as e:
        print(f"Error processing {file}: {e}")
        return {'file': file}

results = [process_file_bias.remote(file) for file in files]
results_list = ray.get(results)

result_df = pd.DataFrame(results_list)
result_df

88


Unnamed: 0,file,pc1,bias1,misp_rate1,pc2,bias2,misp_rate2,pc3,bias3,misp_rate3,...,misp_rate7,pc8,bias8,misp_rate8,pc9,bias9,misp_rate9,pc10,bias10,misp_rate10
0,int_19_trace_branch_misps.csv,0x59cbdc,0.243794,0.076328,0x59ccc8,0.215302,0.076075,0x59b6a4,0.742767,0.095069,...,0.116562,0x59b6b8,0.147059,0.076018,0x59b590,0.145805,0.037940,0x59e7b8,0.756612,0.061368
1,web_15_trace_branch_misps.csv,0xaaaab1a14cf4,0.272245,0.165411,0xaaaab17dc5f0,0.257697,0.199494,0xaaaab1deeee8,0.138799,0.133765,...,0.170878,0xaaaab17dc5f8,0.825773,0.131443,0xaaaab17e1d44,0.857865,0.142697,0xaaaab2ab6234,0.696925,0.123512
2,int_4_trace_branch_misps.csv,0xfffff1dcb9d4,0.494381,0.159058,0xfffff1ef2640,0.201211,0.143001,0xfffff1ef2f48,0.232158,0.105367,...,0.189789,0xfffff1ef3070,0.715757,0.234179,0xfffff1ef2f54,0.786761,0.091134,0xfffff1ef31a0,0.526859,0.139010
3,compress_6_trace_branch_misps.csv,0xffffdfe32454,0.074030,0.063815,0xffffdfe325bc,0.906193,0.051750,0xffffdfe32484,0.032187,0.031227,...,0.135581,0xffffdfe32494,0.007508,0.006844,0xffffdfe32948,0.409760,0.361219,0xffffdfe31de0,0.964167,0.036747
4,infra_6_trace_branch_misps.csv,0x4000006b001c,0.501182,0.108066,0x4000006aff94,0.525531,0.088764,0x4000006afe94,0.850807,0.044732,...,0.064763,0x4000006beef8,0.648028,0.075066,0x4000006b22f8,0.765585,0.135590,0x417ba0,0.225228,0.009559
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,infra_3_trace_branch_misps.csv,0x40000012429c,0.747397,0.088580,0x400000124280,0.837795,0.032367,0x400000124288,0.157452,0.048901,...,0.192179,0x4000006b3724,0.968675,0.008619,0x417ba0,0.180733,0.027068,0x5b7480,0.862673,0.107962
101,infra_5_trace_branch_misps.csv,0x4000006afe94,0.461002,0.076887,0x4000006aff94,0.963717,0.034600,0x4000006b001c,0.815076,0.026794,...,0.494424,0x4000006befdc,0.591522,0.250482,0x4000006bef78,0.662069,0.337931,0x4000006afe9c,0.999931,0.000078
102,infra_14_trace_branch_misps.csv,0x400007e8b608,0.506078,0.404859,0x400007e8b4ac,0.546322,0.433960,0x400007e8b5c4,0.465927,0.407275,...,0.216762,0x400007d20690,0.562413,0.441048,0x400007db6128,0.396967,0.402776,0x400007d24400,0.527576,0.411376
103,int_14_trace_branch_misps.csv,0x4370a4,0.981677,0.016023,0x44d0bc,0.979037,0.002940,0x4480b8,0.021412,0.002093,...,0.037450,0x2ed2d0,0.612839,0.217653,0x2eba5c,0.076570,0.030628,0x2ecf7c,0.961543,0.022841


In [5]:
mcdf = pd.read_csv('most_common_vs_mispredicted.csv')
mcdf = mcdf[mcdf['index_in_top3'].isin([0.0, 1.0])]
mcdf

Unnamed: 0,file,index_in_top3,top3_common_pcs,most_mispredicted_pc
3,compress_6_trace_branch_misps.csv,0.0,"[('0xffffdfe32454', 215372), ('0xffffdfe3248c'...",0xffffdfe32454
5,fp_11_trace_branch_misps.csv,0.0,"[('0x4f70dc', 246240), ('0x4f317c', 234584), (...",0x4f70dc
6,compress_1_trace_branch_misps.csv,0.0,"[('0xffffdfe32454', 184505), ('0xffffdfe3248c'...",0xffffdfe32454
7,int_30_trace_branch_misps.csv,0.0,"[('0x399a78', 104266), ('0x39d2d8', 79333), ('...",0x399a78
15,int_11_trace_branch_misps.csv,1.0,"[('0xfffff2277e34', 295026), ('0xfffff2274e18'...",0xfffff2274e18
17,web_14_trace_branch_misps.csv,0.0,"[('0xaaaab2f68c60', 1725240), ('0xaaaab2f68c44...",0xaaaab2f68c60
18,int_29_trace_branch_misps.csv,0.0,"[('0x399a78', 112046), ('0x39d2ec', 81616), ('...",0x399a78
24,int_34_trace_branch_misps.csv,1.0,"[('0x2a243c', 1086130), ('0x2a2450', 1086128),...",0x2a2450
26,int_5_trace_branch_misps.csv,1.0,"[('0xfffff1f29ca4', 194881), ('0xfffff1f29d18'...",0xfffff1f29d18
28,int_36_trace_branch_misps.csv,0.0,"[('0x7ca302b538', 71540), ('0xffffff8008eba120...",0x7ca302b538


In [5]:
print(mcdf['file'].values)

['compress_6_trace_branch_misps.csv' 'fp_11_trace_branch_misps.csv'
 'compress_1_trace_branch_misps.csv' 'int_30_trace_branch_misps.csv'
 'int_11_trace_branch_misps.csv' 'web_14_trace_branch_misps.csv'
 'int_29_trace_branch_misps.csv' 'int_34_trace_branch_misps.csv'
 'int_5_trace_branch_misps.csv' 'int_36_trace_branch_misps.csv'
 'infra_8_trace_branch_misps.csv' 'fp_0_trace_branch_misps.csv'
 'fp_4_trace_branch_misps.csv' 'compress_5_trace_branch_misps.csv'
 'fp_1_trace_branch_misps.csv' 'infra_7_trace_branch_misps.csv'
 'int_1_trace_branch_misps.csv' 'int_7_trace_branch_misps.csv'
 'compress_3_trace_branch_misps.csv' 'infra_2_trace_branch_misps.csv'
 'int_6_trace_branch_misps.csv' 'int_3_trace_branch_misps.csv'
 'infra_9_trace_branch_misps.csv' 'int_32_trace_branch_misps.csv'
 'int_2_trace_branch_misps.csv' 'fp_2_trace_branch_misps.csv'
 'web_24_trace_branch_misps.csv' 'fp_3_trace_branch_misps.csv'
 'int_21_trace_branch_misps.csv' 'compress_2_trace_branch_misps.csv'
 'compress_7_trace

In [6]:
# Find the taken/not taken bias for all files
for file in mcdf['file'].values:
    fp = f'./{case}/{file}'
    print(fp)
    df = pd.read_csv(fp)
    df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    pcs = problematic_pcs(df, 10)
    pcs = [pc[0] for pc in pcs]
    biases = pc_bias(df, pcs)
    
    print(f"File: {file}, Taken Bias: {biases[0]}")

./tagescl192/compress_6_trace_branch_misps.csv
File: compress_6_trace_branch_misps.csv, Taken Bias: 0.07403005033151942
./tagescl192/fp_11_trace_branch_misps.csv
File: fp_11_trace_branch_misps.csv, Taken Bias: 0.4983715074723847
./tagescl192/compress_1_trace_branch_misps.csv
File: compress_1_trace_branch_misps.csv, Taken Bias: 0.0650443077423376
./tagescl192/int_30_trace_branch_misps.csv
File: int_30_trace_branch_misps.csv, Taken Bias: 0.6136516218134387
./tagescl192/int_11_trace_branch_misps.csv
File: int_11_trace_branch_misps.csv, Taken Bias: 0.18606686957035637
./tagescl192/web_14_trace_branch_misps.csv
File: web_14_trace_branch_misps.csv, Taken Bias: 0.7130833970925784
./tagescl192/int_29_trace_branch_misps.csv
File: int_29_trace_branch_misps.csv, Taken Bias: 0.533959266729736
./tagescl192/int_34_trace_branch_misps.csv
File: int_34_trace_branch_misps.csv, Taken Bias: 0.9446787119013597
./tagescl192/int_5_trace_branch_misps.csv
File: int_5_trace_branch_misps.csv, Taken Bias: 0.21415

In [4]:
df = pd.read_csv('./tagescl192/web_14_trace_branch_misps.csv')
df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
df['misp'] = df['actual'] != df['predicted']
df

Unnamed: 0,seq_no,pc,next_pc,actual,predicted,misp
0,21,0x80001aac,0x80001b74,1,0,True
1,127,0xaaaab2f694fc,0xaaaab2f69500,0,0,False
2,134,0xaaaab2f69518,0xaaaab2f6951c,0,0,False
3,143,0xaaaab2f693f8,0xaaaab2f693fc,0,0,False
4,148,0xaaaab2f6940c,0xaaaab2f69444,1,0,True
...,...,...,...,...,...,...
11059732,77409002,0xffffdeed9084,0xffffdeed9088,0,0,False
11059733,77409067,0xaaaaad3223b8,0xaaaaad3223bc,0,0,False
11059734,77409143,0xaaaaad302850,0xaaaaad302868,1,1,False
11059735,77409209,0xaaaaad3026bc,0xaaaaad302630,1,1,False


In [5]:
df = pd.read_csv('./tagescl179/int_14_trace_branch_misps.csv')
df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
# Add misprediction indicator
df['misp'] = df['actual'] != df['predicted']

# Group by PC and calculate bias
branch_stats = df.groupby('pc').agg({
    'actual': 'mean',  # Bias - fraction of times the branch is taken
    'misp': 'mean',    # Misprediction rate
    'pc': 'count'      # Number of occurrences
}).rename(columns={'pc': 'count', 'actual': 'bias'})

# Filter for highly biased branches (>90% in either direction)
biased_branches = branch_stats[(branch_stats['bias'] > 0.9) | (branch_stats['bias'] < 0.1)]

# Sort by count to see most frequent biased branches first
biased_branches = biased_branches.sort_values('count', ascending=False)

print(f"Found {len(biased_branches)} highly biased branches")
biased_branches

Found 2228 highly biased branches


Unnamed: 0_level_0,bias,misp,count
pc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0x437094,0.000117,0.000142,470877
0x4370a4,0.981677,0.016032,470822
0x44d010,0.000000,0.000007,410490
0x44d01c,1.000000,0.000002,410490
0x44d060,1.000000,0.000005,410490
...,...,...,...
0xffff80000821c900,1.000000,0.000000,1
0x2f3f04,0.000000,0.000000,1
0xffff8000082079c8,1.000000,0.000000,1
0xffff80000853338c,0.000000,0.000000,1


In [10]:
history_before_misps = []
for file in ['compress_0_trace_branch_misps.csv']:
    fp = f'./tagescl192/{file}'
    df = pd.read_csv(fp)
    df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    df['delta'] = (df['next_pc'].apply(int, base=16).astype(int) - 
                   df['pc'].apply(int, base=16).astype(int)).abs()
    pcs = problematic_pcs(df)
    # Computing statistics for the first, most mispredicted PC
    pc, count = pcs[0]
    
    misp_indices = df[(df['pc'] == pc) & (df['actual'] != df['predicted'])].index
    filtered_df = df[df['pc'] == pc]
    
    print(f"Bias: {filtered_df['actual'].mean()}")
    
    for i, idx in enumerate(misp_indices):
        if idx > 500:  # Skip first 500 rows
            history = df.loc[misp_indices[i - 1] - 100:idx]
            history_before_misps.append(history)
filtered_df

Bias: 0.9015582358262855


Unnamed: 0,seq_no,pc,next_pc,actual,predicted,delta
1,130,0xffffdfdf0814,0xffffdfdf07a0,1,0,116
2,161,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
3,192,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
4,223,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
5,254,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
...,...,...,...,...,...,...
11747776,126275456,0xffffdfdf0814,0xffffdfdf0818,0,1,4
11747861,126276075,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
11747862,126276106,0xffffdfdf0814,0xffffdfdf07a0,1,1,116
11747863,126276137,0xffffdfdf0814,0xffffdfdf07a0,1,1,116


In [11]:
history_before_misps = []
for file in ['int_7_trace_branch_misps.csv']:
    fp = f'./tagescl192/{file}'
    df = pd.read_csv(fp)
    df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    df['delta'] = (df['next_pc'].apply(int, base=16).astype(int) - 
                   df['pc'].apply(int, base=16).astype(int)).abs()
    pcs = problematic_pcs(df)
    print(pcs)
    # Computing statistics for the first, most mispredicted PC
    pc, count = pcs[0]
    
    misp_indices = df[(df['pc'] == pc) & (df['actual'] != df['predicted'])].index
    filtered_df = df[df['pc'] == pc]
    
    print(f"Bias: {filtered_df['actual'].mean()}")
    
    for i, idx in enumerate(misp_indices):
        if idx > 500:  # Skip first 500 rows
            history = df.loc[misp_indices[i - 1] - 100:idx]
            history_before_misps.append(history)
filtered_df

[('0xfffff336e460', 3550), ('0xfffff33b813c', 1871), ('0xfffff3242de8', 1493), ('0xfffff33b88c0', 1472), ('0xfffff3044d60', 1227), ('0xfffff33fdc2c', 1187), ('0xfffff33b8128', 1149), ('0xfffff336e448', 1057), ('0xfffff0fbae64', 1022), ('0xfffff33ff20c', 960), ('0xfffff33b88a4', 932), ('0xfffff33693d0', 879), ('0xfffff3242e60', 844), ('0xfffff33693e0', 822), ('0xfffff3069994', 768), ('0xfffff3237b5c', 736), ('0xfffff3074548', 704), ('0xfffff3242fec', 652), ('0xfffff31d81f4', 633), ('0xfffff33b8164', 628), ('0xfffff33b8750', 628), ('0xfffff342976c', 608), ('0xfffff336930c', 588), ('0xfffff3065044', 582), ('0xfffff3242e88', 545), ('0xfffff3038fe4', 527), ('0xffffd48a8ce0', 526), ('0xfffff2e0f558', 520), ('0xfffff342a118', 493), ('0xfffff3027670', 491), ('0xfffff3243038', 489), ('0xfffff306512c', 488), ('0xfffff33fd4d4', 484), ('0xfffff34fb228', 479), ('0xfffff30181c8', 474), ('0xfffff2e0e4d4', 472), ('0xfffff302c174', 448), ('0xfffff34400b0', 446), ('0xfffff34403f8', 444), ('0xfffff2e0e7e

Unnamed: 0,seq_no,pc,next_pc,actual,predicted,delta
1031,9308,0xfffff336e460,0xfffff336e464,0,0,4
1094,9965,0xfffff336e460,0xfffff336e440,1,0,32
1096,9975,0xfffff336e460,0xfffff336e464,0,0,4
1144,10471,0xfffff336e460,0xfffff336e440,1,1,32
1146,10481,0xfffff336e460,0xfffff336e440,1,0,32
...,...,...,...,...,...,...
5551601,43024656,0xfffff336e460,0xfffff336e440,1,1,32
5551603,43024666,0xfffff336e460,0xfffff336e440,1,1,32
5551605,43024676,0xfffff336e460,0xfffff336e440,1,1,32
5551607,43024686,0xfffff336e460,0xfffff336e440,1,1,32


In [12]:
history_before_misps = []
for file in ['infra_2_trace_branch_misps.csv']:
    fp = f'./tagescl192/{file}'
    df = pd.read_csv(fp)
    df = df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    df['delta'] = (df['next_pc'].apply(int, base=16).astype(int) - 
                   df['pc'].apply(int, base=16).astype(int)).abs()
    pcs = problematic_pcs(df)
    print(pcs)
    # Computing statistics for the first, most mispredicted PC
    pc, count = pcs[0]
    
    misp_indices = df[(df['pc'] == pc) & (df['actual'] != df['predicted'])].index
    filtered_df = df[df['pc'] == pc]
    
    print(f"Bias: {filtered_df['actual'].mean()}")
    
    for i, idx in enumerate(misp_indices):
        if idx > 500:  # Skip first 500 rows
            history = df.loc[misp_indices[i - 1] - 100:idx]
            history_before_misps.append(history)
filtered_df

[('0x4000006befdc', 8229), ('0x40000015f870', 1757), ('0x4000006b1d14', 1692), ('0x4000006b229c', 1495), ('0x4000006b2224', 705), ('0x4000006b2290', 500), ('0x4000006b2704', 406), ('0x4000006b22f8', 367), ('0x4000006b28b8', 247), ('0x4000006afe94', 203), ('0x4000006b2910', 197), ('0x4000006b1d20', 165), ('0x4000006b223c', 135), ('0x4000000190f0', 82), ('0x40000000a054', 77), ('0x4000006b02ec', 77), ('0x4000006b28e8', 76), ('0x4000000098b0', 70), ('0x4000006b2058', 67), ('0x400000009664', 66), ('0x40000001913c', 54), ('0x4000006b25fc', 51), ('0x4000006b21fc', 47), ('0xffff0000083b45bc', 44), ('0xffff0000083bc150', 37), ('0x4000006b2594', 33), ('0x4000006b02a0', 29), ('0x4000006b2050', 24), ('0x40000000989c', 22), ('0xffff0000085bb0e4', 22), ('0x4000000097f8', 21), ('0x40000001914c', 19), ('0x400000019114', 18), ('0xffff0000085c9ca4', 18), ('0xffff0000084d3f60', 18), ('0x400000019078', 17), ('0xffff0000080834f4', 17), ('0xffff0000085bb0d8', 17), ('0xffff000008185094', 17), ('0xffff000008

Unnamed: 0,seq_no,pc,next_pc,actual,predicted,delta
27601,191194,0x4000006befdc,0x4000006befc0,1,0,28
27602,191211,0x4000006befdc,0x4000006befc0,1,1,28
27603,191227,0x4000006befdc,0x4000006befc0,1,1,28
27604,191244,0x4000006befdc,0x4000006befc0,1,1,28
27605,191261,0x4000006befdc,0x4000006befc0,1,1,28
...,...,...,...,...,...,...
5785701,76226250,0x4000006befdc,0x4000006befc0,1,1,28
5785702,76226267,0x4000006befdc,0x4000006befc0,1,1,28
5785703,76226284,0x4000006befdc,0x4000006befc0,1,1,28
5785704,76226301,0x4000006befdc,0x4000006befc0,1,1,28


In [None]:
for file in ['compress_0_trace_branch_misps.csv']:
    fp = f'./tagescl192/{file}'
    df = pd.read_csv(fp)
    pcs = problematic_pcs(df)
    print(misp_rate(df))
    pc_values = [pc for pc, _ in pcs]
    filtered_df = df[df['pc'].isin(pc_values)].copy()
    filtered_df['delta'] = (filtered_df['next_pc'].apply(int, base=16).astype(int) - 
                            filtered_df['pc'].apply(int, base=16).astype(int)).abs()
    filtered_df = filtered_df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    filtered_df['mispredictions'] = filtered_df['actual'] != filtered_df['predicted']
    
    fp = f'./tagescl/{file}'
    df2 = pd.read_csv(fp)
    pcs2 = problematic_pcs(df2)
    print(misp_rate(df2))
    pc_values = [pc for pc, _ in pcs2]
    filtered_df2 = df2[df2['pc'].isin(pc_values)].copy()
    filtered_df2['delta'] = (filtered_df2['next_pc'].apply(int, base=16).astype(int) - 
                            filtered_df2['pc'].apply(int, base=16).astype(int)).abs()
    filtered_df2 = filtered_df2.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    filtered_df2['mispredictions'] = filtered_df2['actual'] != filtered_df2['predicted']
    break
df

0.03288858623368056
0.033382122298713596


Unnamed: 0,seq_no,piece,pc,next_pc,branch_type,actual,predicted,global_hist,cycle,local_hist,confidence
0,21,0,0x80001984,0x80001a60,1,1,0,0x0,913,0x0,0
1,130,0,0xffffdfdf0814,0xffffdfdf07a0,1,1,0,0x0,1690,0x0,0
2,161,0,0xffffdfdf0814,0xffffdfdf07a0,1,1,1,0x0,1981,0x0,0
3,192,0,0xffffdfdf0814,0xffffdfdf07a0,1,1,1,0x0,1983,0x0,0
4,223,0,0xffffdfdf0814,0xffffdfdf07a0,1,1,1,0x0,1985,0x0,0
...,...,...,...,...,...,...,...,...,...,...,...
11747870,126276246,0,0xffffdfdf08fc,0xffffdfdf0900,1,0,0,0x0,25482926,0x0,0
11747871,126276248,0,0xffffdfdf0904,0xffffdfdf0908,1,0,0,0x0,25482927,0x0,0
11747872,126276251,0,0xffffdfdf0910,0xffffdfdf08c4,1,1,1,0x0,25482927,0x0,0
11747873,126276260,0,0xffffdfdf08e0,0xffffdfdf08e4,1,0,0,0x0,25482928,0x0,0


In [None]:
for file in ['int_0_trace_branch_misps.csv']:
    fp = f'./tage/{file}'
    df = pd.read_csv(fp)
    pcs = problematic_pcs(df)
    print(misp_rate(df))
    pc_values = [pc for pc, _ in pcs]
    filtered_df = df[df['pc'].isin(pc_values)].copy()
    filtered_df['delta'] = (filtered_df['next_pc'].apply(int, base=16).astype(int) - 
                            filtered_df['pc'].apply(int, base=16).astype(int)).abs()
    filtered_df = filtered_df.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    filtered_df['mispredictions'] = filtered_df['actual'] != filtered_df['predicted']
    
    fp = f'./tagescl/{file}'
    df2 = pd.read_csv(fp)
    pcs2 = problematic_pcs(df2)
    print(misp_rate(df2))
    pc_values = [pc for pc, _ in pcs2]
    filtered_df2 = df2[df2['pc'].isin(pc_values)].copy()
    filtered_df2['delta'] = (filtered_df2['next_pc'].apply(int, base=16).astype(int) - 
                            filtered_df2['pc'].apply(int, base=16).astype(int)).abs()
    filtered_df2 = filtered_df2.drop(['global_hist', 'cycle', 'piece', 'local_hist', 'confidence', 'branch_type'], axis=1)
    filtered_df2['mispredictions'] = filtered_df2['actual'] != filtered_df2['predicted']
    break
df2

0.03468934715137472
0.032990560421257975


Unnamed: 0,seq_no,pc,next_pc,actual,predicted,delta,mispredictions
467,10749,0xffff800008523d88,0xffff800008523d7c,1,0,12,1
468,10753,0xffff800008523d88,0xffff800008523d7c,1,1,12,0
469,10757,0xffff800008523d88,0xffff800008523d7c,1,1,12,0
470,10761,0xffff800008523d88,0xffff800008523d7c,1,1,12,0
471,10765,0xffff800008523d88,0xffff800008523d7c,1,1,12,0
...,...,...,...,...,...,...,...
6223259,41766604,0xfffff21fb738,0xfffff21fb73c,0,0,4,0
6223260,41766610,0xfffff21fb750,0xfffff21fb730,1,1,32,0
6223261,41766613,0xfffff21fb738,0xfffff21fb73c,0,0,4,0
6223262,41766619,0xfffff21fb750,0xfffff21fb730,1,1,32,0


In [36]:
print(pcs)
print(pcs2)

[('0xfffff21fcc90', 23217), ('0xfffff21fe164', 19192), ('0xfffff21fb750', 17825), ('0xfffff21fcca8', 13313), ('0xfffff21fae30', 13241), ('0xfffff21fe17c', 11241), ('0xfffff21fb768', 11214), ('0xfffff21fcc78', 10479), ('0xfffff21fcd08', 9876), ('0xfffff21fb738', 9521), ('0xfffff21fe14c', 8784), ('0xfffff21fe1dc', 8525), ('0xfffff21fc430', 7672), ('0xfffff21fb7c8', 6848), ('0xfffff21faf04', 6642), ('0xfffff21fd904', 6182), ('0xfffff21fbb58', 4014), ('0xfffff21faee8', 3340), ('0xfffff21fc414', 2834), ('0xfffff21fd8e8', 2654), ('0xfffff21faec4', 2615), ('0xfffff21fd0a0', 2458), ('0xfffff21fcd70', 2118), ('0xfffff21fe244', 1869), ('0xfffff21fb830', 1580), ('0xfffff21fe574', 1226), ('0xfffff21fd8bc', 842), ('0xfffff21fbb94', 737), ('0xfffff21fc3e8', 567), ('0xfffff21fa6e4', 430), ('0xfffff21fbb50', 366), ('0xfffff21fe7e8', 318), ('0xfffff21fd0b4', 318), ('0xfffff21fbba0', 299), ('0xfffff21fbbec', 287), ('0xfffff21fd0c8', 272), ('0xfffff21fa778', 191), ('0xfffff21fbbd8', 175), ('0xfffff21fc43

In [10]:
print(pcs[0][0])

0xffffdfdf0814
