<a href="https://colab.research.google.com/github/pedroblossbraga/computer-architecture/blob/main/cache_simulator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
import numpy as np
import random
from IPython.display import display
from collections import deque, Counter

def parse_address(hex_address, offset_bits, index_bits):
    # Convert hex to 32-bit binary string
    address_bin = bin(int(hex_address, 16))[2:].zfill(32)

    offset = address_bin[-offset_bits:] if offset_bits else ''
    index = address_bin[-(offset_bits + index_bits):-offset_bits] if index_bits else ''
    tag = address_bin[:32 - (offset_bits + index_bits)]

    return address_bin, tag, index, offset


def simulate_cache(addresses, organization, replacement, cache_size_bytes=128, line_size=16, address_bits=32):
    num_lines = cache_size_bytes // line_size
    offset_bits = int(np.log2(line_size))

    if organization == 'direct':
        index_bits = int(np.log2(num_lines))
        sets = [None] * num_lines
        tags = [None] * num_lines
        history = [deque() for _ in range(num_lines)]

    elif organization == 'fully':
        index_bits = 0
        sets = [None] * 1  # One big set
        tags = [None] * num_lines
        history = deque()

    elif organization.startswith('set'):
        assoc = int(organization.split('-')[-1])  # e.g. set-2 for 2-way
        num_sets = num_lines // assoc
        index_bits = int(np.log2(num_sets))
        sets = [[] for _ in range(num_sets)]
        history = [deque() for _ in range(num_sets)]
    else:
        raise ValueError("Unknown organization")

    df_rows = []

    for i, addr in enumerate(addresses):
        hex_addr = addr.split()[-1]
        bin_addr_full, tag, index, offset = parse_address(hex_addr, offset_bits, index_bits)

        # Trim leading zeros and group into 4-bit chunks
        bin_addr_trimmed = bin(int(hex_addr, 16))[2:].zfill(4 * ((len(hex_addr)-2)*4))
        bin_addr_grouped = ' '.join([bin_addr_trimmed[i:i+4] for i in range(0, len(bin_addr_trimmed), 4)])

        print(f"\nInstruction {i+1}: {addr}")
        print(f"Hex Address: {hex_addr}")
        relevant_bin_idxs = 4*(len(addr)-2-4)
        print(f"Binary Address: {bin_addr_grouped[-relevant_bin_idxs:]}")
        print(f"Tag: {tag[-(relevant_bin_idxs-len(index)-len(offset)-5):]} | Index: {index if index else '-'} | Offset: {offset if offset else '-'}")

        if organization == 'direct':
            set_idx = int(index, 2)
            hit = (tags[set_idx] == tag)
            if not hit:
                tags[set_idx] = tag
            location = f"Set {set_idx}, Line 0"

        elif organization == 'fully':
            hit = tag in tags
            if hit:
                location = f"Line {tags.index(tag)}"
            else:
                if None in tags:
                    line_idx = tags.index(None)
                    tags[line_idx] = tag
                else:
                    if replacement == 'LRU':
                        old_tag = history.popleft()
                    elif replacement == 'FIFO':
                        old_tag = history.popleft()
                    elif replacement == 'LFU':
                        count = Counter(history)
                        old_tag = count.most_common()[-1][0]
                        history.remove(old_tag)
                    elif replacement == 'Random':
                        old_tag = random.choice(list(history))
                        history.remove(old_tag)
                    else:
                        raise ValueError("Unknown replacement strategy")
                    idx_to_replace = tags.index(old_tag)
                    tags[idx_to_replace] = tag
                location = f"Line {tags.index(tag)}"
            if tag not in history:
                history.append(tag)
            else:
                if replacement == 'LRU':
                    history.remove(tag)
                    history.append(tag)

        elif organization.startswith('set'):
            assoc = int(organization.split('-')[-1])
            set_idx = int(index, 2)
            current_set = sets[set_idx]
            current_history = history[set_idx]
            tags_in_set = [entry for entry in current_set]

            hit = tag in tags_in_set

            if hit:
                line_idx = tags_in_set.index(tag)
                location = f"Set {set_idx}, Line {line_idx}"
                if replacement == 'LRU':
                    current_history.remove(tag)
                    current_history.append(tag)
            else:
                if len(current_set) < assoc:
                    current_set.append(tag)
                    location = f"Set {set_idx}, Line {len(current_set)-1}"
                else:
                    if replacement == 'LRU':
                        evicted = current_history.popleft()
                    elif replacement == 'FIFO':
                        evicted = current_history.popleft()
                    elif replacement == 'LFU':
                        count = Counter(current_history)
                        evicted = count.most_common()[-1][0]
                        current_history.remove(evicted)
                    elif replacement == 'Random':
                        evicted = random.choice(list(current_history))
                        current_history.remove(evicted)
                    else:
                        raise ValueError("Unknown replacement strategy")
                    evict_idx = current_set.index(evicted)
                    current_set[evict_idx] = tag
                    location = f"Set {set_idx}, Line {evict_idx}"
                if tag not in current_history:
                    current_history.append(tag)

        print(f"Result: {'Hit ✅' if hit else 'Miss ❌'} — Placed in {location}")

        df_rows.append({
            'Iteration': i+1,
            'Hex Address': hex_addr,
            'Binary': bin_addr_grouped[-relevant_bin_idxs:],
            'Tag': tag[-(relevant_bin_idxs-len(index)-len(offset)-5):],
            'Index': index,
            'Offset': offset,
            'Location': location,
            'Hit/Miss': 'Hit' if hit else 'Miss'
        })
    return pd.DataFrame(df_rows)

def create_cache_simulator(organization, replacement, address_list, cache_size_bytes=128, line_size=16):
    return simulate_cache(address_list, organization, replacement, cache_size_bytes, line_size)

addresses = [
    "LOAD 0x110C",
    "LOAD 0x1010",
    "LOAD 0x1114",
    "LOAD 0x1210",
    "LOAD 0x1110"
]
create_cache_simulator(
    organization='set-2',  # 2-way set associative
    replacement='LRU',
    address_list=addresses,
    cache_size_bytes=128,
    line_size=16
)



Instruction 1: LOAD 0x110C
Hex Address: 0x110C
Binary Address:  0001 0001 0000 1100
Tag: 001000100 | Index: 00 | Offset: 1100
Result: Miss ❌ — Placed in Set 0, Line 0

Instruction 2: LOAD 0x1010
Hex Address: 0x1010
Binary Address:  0001 0000 0001 0000
Tag: 001000000 | Index: 01 | Offset: 0000
Result: Miss ❌ — Placed in Set 1, Line 0

Instruction 3: LOAD 0x1114
Hex Address: 0x1114
Binary Address:  0001 0001 0001 0100
Tag: 001000100 | Index: 01 | Offset: 0100
Result: Miss ❌ — Placed in Set 1, Line 1

Instruction 4: LOAD 0x1210
Hex Address: 0x1210
Binary Address:  0001 0010 0001 0000
Tag: 001001000 | Index: 01 | Offset: 0000
Result: Miss ❌ — Placed in Set 1, Line 0

Instruction 5: LOAD 0x1110
Hex Address: 0x1110
Binary Address:  0001 0001 0001 0000
Tag: 001000100 | Index: 01 | Offset: 0000
Result: Hit ✅ — Placed in Set 1, Line 1


Unnamed: 0,Iteration,Hex Address,Binary,Tag,Index,Offset,Location,Hit/Miss
0,1,0x110C,0001 0001 0000 1100,1000100,0,1100,"Set 0, Line 0",Miss
1,2,0x1010,0001 0000 0001 0000,1000000,1,0,"Set 1, Line 0",Miss
2,3,0x1114,0001 0001 0001 0100,1000100,1,100,"Set 1, Line 1",Miss
3,4,0x1210,0001 0010 0001 0000,1001000,1,0,"Set 1, Line 0",Miss
4,5,0x1110,0001 0001 0001 0000,1000100,1,0,"Set 1, Line 1",Hit


In [15]:
create_cache_simulator(
    organization='direct',
    replacement='LRU',
    address_list=addresses,
    cache_size_bytes=128,
    line_size=16
)


Instruction 1: LOAD 0x110C
Hex Address: 0x110C
Binary Address:  0001 0001 0000 1100
Tag: 00100010 | Index: 000 | Offset: 1100
Result: Miss ❌ — Placed in Set 0, Line 0

Instruction 2: LOAD 0x1010
Hex Address: 0x1010
Binary Address:  0001 0000 0001 0000
Tag: 00100000 | Index: 001 | Offset: 0000
Result: Miss ❌ — Placed in Set 1, Line 0

Instruction 3: LOAD 0x1114
Hex Address: 0x1114
Binary Address:  0001 0001 0001 0100
Tag: 00100010 | Index: 001 | Offset: 0100
Result: Miss ❌ — Placed in Set 1, Line 0

Instruction 4: LOAD 0x1210
Hex Address: 0x1210
Binary Address:  0001 0010 0001 0000
Tag: 00100100 | Index: 001 | Offset: 0000
Result: Miss ❌ — Placed in Set 1, Line 0

Instruction 5: LOAD 0x1110
Hex Address: 0x1110
Binary Address:  0001 0001 0001 0000
Tag: 00100010 | Index: 001 | Offset: 0000
Result: Miss ❌ — Placed in Set 1, Line 0


Unnamed: 0,Iteration,Hex Address,Binary,Tag,Index,Offset,Location,Hit/Miss
0,1,0x110C,0001 0001 0000 1100,100010,0,1100,"Set 0, Line 0",Miss
1,2,0x1010,0001 0000 0001 0000,100000,1,0,"Set 1, Line 0",Miss
2,3,0x1114,0001 0001 0001 0100,100010,1,100,"Set 1, Line 0",Miss
3,4,0x1210,0001 0010 0001 0000,100100,1,0,"Set 1, Line 0",Miss
4,5,0x1110,0001 0001 0001 0000,100010,1,0,"Set 1, Line 0",Miss


In [16]:
create_cache_simulator(
    organization='fully',
    replacement='LRU',
    address_list=addresses,
    cache_size_bytes=128,
    line_size=16
)


Instruction 1: LOAD 0x110C
Hex Address: 0x110C
Binary Address:  0001 0001 0000 1100
Tag: 00100010000 | Index: - | Offset: 1100
Result: Miss ❌ — Placed in Line 0

Instruction 2: LOAD 0x1010
Hex Address: 0x1010
Binary Address:  0001 0000 0001 0000
Tag: 00100000001 | Index: - | Offset: 0000
Result: Miss ❌ — Placed in Line 1

Instruction 3: LOAD 0x1114
Hex Address: 0x1114
Binary Address:  0001 0001 0001 0100
Tag: 00100010001 | Index: - | Offset: 0100
Result: Miss ❌ — Placed in Line 2

Instruction 4: LOAD 0x1210
Hex Address: 0x1210
Binary Address:  0001 0010 0001 0000
Tag: 00100100001 | Index: - | Offset: 0000
Result: Miss ❌ — Placed in Line 3

Instruction 5: LOAD 0x1110
Hex Address: 0x1110
Binary Address:  0001 0001 0001 0000
Tag: 00100010001 | Index: - | Offset: 0000
Result: Hit ✅ — Placed in Line 2


Unnamed: 0,Iteration,Hex Address,Binary,Tag,Index,Offset,Location,Hit/Miss
0,1,0x110C,0001 0001 0000 1100,100010000,,1100,Line 0,Miss
1,2,0x1010,0001 0000 0001 0000,100000001,,0,Line 1,Miss
2,3,0x1114,0001 0001 0001 0100,100010001,,100,Line 2,Miss
3,4,0x1210,0001 0010 0001 0000,100100001,,0,Line 3,Miss
4,5,0x1110,0001 0001 0001 0000,100010001,,0,Line 2,Hit
