In [11]:
# --- index mapping calculation function ---
# This function maps a coefficient index to a memory bank and offset
# based on the URAM word it is stored in, under a 3-coeff → 2-word packing scheme.
# It is used to determine how coefficients are stored in the memory banks
# of a hardware accelerator, such as an FPGA.
# for  BConv
def idx_to_physical_bank(idx):
    """
    Maps a coefficient index to memory bank,
    based on the URAM word it is stored in,
    under 3-coeff → 2-word packing.
    """

    # URAM bank configuration
    NUM_BANKS = 22
    NUM_COEFFS = 32  # Coefficients per block

    
    # Compute which block this coefficient belongs to
    block_idx = idx // NUM_COEFFS  # Block number (0, 1, 2, ...)
    coeff_in_block = idx % NUM_COEFFS  # Coefficient index within the block

    # Compute which URAM word(s) contain this coefficient within the block
    group = coeff_in_block // 3
    mod = coeff_in_block % 3

    if mod == 0:
        word_index_within_block = 2 * group
    elif mod == 1:
        word_index_within_block = 2 * group
    else:
        word_index_within_block = 2 * group + 1

    # Compute global word index
    words_per_block = ((NUM_COEFFS + 2) // 3) * 2
    global_word_index = block_idx * words_per_block + word_index_within_block

    # Compute bank and offset
    bank = global_word_index % NUM_BANKS
    offset = global_word_index // NUM_BANKS

    return bank, offset

In [12]:
bank, offsite = idx_to_physical_bank(32)
print(f"Bank: {bank}, Offset: {offsite}")

Bank: 0, Offset: 1


In [14]:
# Assumsing one data in one bank (logical bank)
# Coefficient-based bank calculation
# 
def idx_to_logical_bank(idx):
    # Total number of banks
    NUM_BANKS = 32  # Banks 0 to 31
    
    # Extract the bank number: bits [4:0] of idx
    bank = idx & (NUM_BANKS - 1)  # idx % 32
    
    # Extract the offset (row): bits [15:5] of idx
    offset = (idx >> 5)   # idx // 32, max 2047
    
    return bank, offset

In [None]:
# Checks whether two different coefficients are mapped to the same physical memory location — 
# i.e., same (bank, offset).

# A conflict means:

# You cannot read both coefficients at the same time (same cycle),
# Because URAM banks are typically single-port or limited dual-port,
# And you can't read two words from the same bank at the same offset simultaneously.
# So this function helps detect memory bank conflicts in your mapping.
def verify_no_conflicts():
    seen = {}
    for idx in range(32 * 10):  # first 10 blocks
        bank, offset = idx_to_physical_bank(idx)
        key = (bank, offset)
        if key in seen:
            print(f"⚠️ Conflict: idx {idx} and {seen[key]} both map to (bank={bank}, offset={offset})")
        seen[key] = idx

In [17]:
verify_no_conflicts()

⚠️ Conflict: idx 1 and 0 both map to (bank=0, offset=0)
⚠️ Conflict: idx 4 and 3 both map to (bank=2, offset=0)
⚠️ Conflict: idx 7 and 6 both map to (bank=4, offset=0)
⚠️ Conflict: idx 10 and 9 both map to (bank=6, offset=0)
⚠️ Conflict: idx 13 and 12 both map to (bank=8, offset=0)
⚠️ Conflict: idx 16 and 15 both map to (bank=10, offset=0)
⚠️ Conflict: idx 19 and 18 both map to (bank=12, offset=0)
⚠️ Conflict: idx 22 and 21 both map to (bank=14, offset=0)
⚠️ Conflict: idx 25 and 24 both map to (bank=16, offset=0)
⚠️ Conflict: idx 28 and 27 both map to (bank=18, offset=0)
⚠️ Conflict: idx 31 and 30 both map to (bank=20, offset=0)
⚠️ Conflict: idx 33 and 32 both map to (bank=0, offset=1)
⚠️ Conflict: idx 36 and 35 both map to (bank=2, offset=1)
⚠️ Conflict: idx 39 and 38 both map to (bank=4, offset=1)
⚠️ Conflict: idx 42 and 41 both map to (bank=6, offset=1)
⚠️ Conflict: idx 45 and 44 both map to (bank=8, offset=1)
⚠️ Conflict: idx 48 and 47 both map to (bank=10, offset=1)
⚠️ Conflict: i

We don’t care if two coeffs are in the same bank, as long as they are at different offsets — because we read one offset at a time (offsie-wise) for BConv.

🧠 Controller Logic (Pseudocode)

```[python]
for base_idx in range(0, total_coeffs, 8):
    # Parallel read: 8 cores read 8 coeffs
    requests = []
    for core_id in range(8):
        idx = base_idx + core_id
        bank, offset = idx_to_logical_bank(idx)
        requests.append((core_id, bank, offset))

    # Issue all 8 reads in parallel
    data = parallel_read([bank for ..., offset for ...])  # One per bank

    # Distribute to cores
    for core_id in range(8):
        core[core_id].load(data[core_id])  # Or unpack if packed

    # All cores compute in parallel
    for core in cores:
        core.compute()
```

In [20]:
def get_idx(core_id, step):
    """
    Returns the coefficient index accessed by core_id at given step.
    """
    return 8 * step + core_id

# Print access sequence for each core (first 6 steps)
print("Core | Step 0 | Step 1 | Step 2 | Step 3 | Step 4 | Step 5")
print("-----|--------|--------|--------|--------|--------|--------")
for core_id in range(8):
    values = [get_idx(core_id, step) for step in range(6)]
    print(f" {core_id}   | {' | '.join(f'{v:6}' for v in values)}")

Core | Step 0 | Step 1 | Step 2 | Step 3 | Step 4 | Step 5
-----|--------|--------|--------|--------|--------|--------
 0   |      0 |      8 |     16 |     24 |     32 |     40
 1   |      1 |      9 |     17 |     25 |     33 |     41
 2   |      2 |     10 |     18 |     26 |     34 |     42
 3   |      3 |     11 |     19 |     27 |     35 |     43
 4   |      4 |     12 |     20 |     28 |     36 |     44
 5   |      5 |     13 |     21 |     29 |     37 |     45
 6   |      6 |     14 |     22 |     30 |     38 |     46
 7   |      7 |     15 |     23 |     31 |     39 |     47
