In [31]:
import math
import numpy
def binary32_to_vector(binary_str, signed=False):
    """
    Convert a 32-bit binary string into a list of 8 decimal numbers
    either as signed 4-bit 2's complement or unsigned 4-bit.

    Parameters:
        binary_str (str): 32-bit binary string
        signed (bool): If True, interpret as signed 4-bit numbers; else unsigned

    Returns:
        List[int]: list of 8 decimal numbers
    """
    if len(binary_str) != 32:
        raise ValueError("Input binary string must be exactly 32 bits")

    vector = []
    for i in range(0, 32, 4):
        chunk = binary_str[i:i+4]
        value = int(chunk, 2)
        if signed:
            # If signed and MSB is 1, compute 2's complement negative value
            if value & 0b1000:
                value -= 16
        vector.append(value)
    return vector

In [32]:
binary_w_list = [
    "00000000000000000000000000000000",
    "00000000000000000000000000000001",
    "00000000000000000000000000000000",
    "00010000001000000000000000000000",
    "00000000000100000000000000000000",
    "00010000000000000000000000000000",
    "00010000000000000000000000000000",
    "00100001000100000000000000010000",
    "00100000001000010000000000000000"]
dec_a_list = []
for b in range(0,9):
    dec_a_list.append(binary32_to_vector(binary_w_list[b]))

In [33]:
dec_a_list

[[0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 2, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0],
 [2, 1, 1, 0, 0, 0, 1, 0],
 [2, 0, 2, 1, 0, 0, 0, 0]]

In [34]:
signed_binary_list = [
     "10011001011101111001011110010111",
    "10011001100101110111011110010111",
    "01110111100101110111011110011001",
    "10010111100101110111100110010111",
    "01111001011101110111011101110111",
    "01111001100101110111011110011001",
    "10010111100101111001100110011001",
    "10010111100110011001100101110111",
    "01111001011110010111100101110111"]
dec_w_list = []
for b in range(0,9):
    dec_w_list.append(binary32_to_vector(signed_binary_list[b],True))

In [35]:
dec_w_list

[[-7, -7, 7, 7, -7, 7, -7, 7],
 [-7, -7, -7, 7, 7, 7, -7, 7],
 [7, 7, -7, 7, 7, 7, -7, -7],
 [-7, 7, -7, 7, 7, -7, -7, 7],
 [7, -7, 7, 7, 7, 7, 7, 7],
 [7, -7, -7, 7, 7, 7, -7, -7],
 [-7, 7, -7, 7, -7, -7, -7, -7],
 [-7, 7, -7, -7, -7, -7, 7, 7],
 [7, -7, 7, -7, 7, -7, 7, 7]]

In [36]:
sum1 = []
for b in range(0,9):
    sum1.append(numpy.array(dec_a_list[b])*numpy.array(dec_w_list[b]))

In [37]:
sum_np = numpy.array(sum1)
sum_np

array([[  0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   7],
       [  0,   0,   0,   0,   0,   0,   0,   0],
       [ -7,   0, -14,   0,   0,   0,   0,   0],
       [  0,   0,   7,   0,   0,   0,   0,   0],
       [  7,   0,   0,   0,   0,   0,   0,   0],
       [ -7,   0,   0,   0,   0,   0,   0,   0],
       [-14,   7,  -7,   0,   0,   0,   7,   0],
       [ 14,   0,  14,  -7,   0,   0,   0,   0]])

In [38]:
f_sum = numpy.sum(sum_np,axis=1)
f_sum

array([  0,   7,   0, -21,   7,   7,  -7,  -7,  21])

In [39]:
def to_hex_array(int_array):
    """
    Convert a NumPy array of integers to a NumPy array of 4-digit hex strings
    using 16-bit representation (handles signed integers with 2's complement).

    Parameters:
        int_array (np.ndarray): NumPy array of integers (signed or unsigned)

    Returns:
        np.ndarray: NumPy array of 4-digit hexadecimal strings
    """
    return numpy.vectorize(lambda x: format(x & 0xFFFF, '04X'))(int_array)

In [40]:
to_hex_array(f_sum)

array(['0000', '0007', '0000', 'FFEB', '0007', '0007', 'FFF9', 'FFF9',
       '0015'], dtype='<U4')

In [41]:
f_sum.sum()

7

In [42]:
def recursive_partial_sums_hex(arr, index=0, current_sum=0, result=None):
    """
    Recursively compute partial sums of an array and return each sum as 16-bit hex.

    Parameters:
        arr (list or np.ndarray): Array of integers
        index (int): Current index for recursion (used internally)
        current_sum (int): Accumulated sum so far (used internally)
        result (list): List of partial sums (used internally)

    Returns:
        np.ndarray: Array of 4-digit hexadecimal strings for each partial sum
    """
    if result is None:
        result = []

    if index >= len(arr):
        return numpy.array(result)

    # Add current element
    current_sum += arr[index]

    # Convert to 16-bit hex string
    hex_value = format(current_sum & 0xFFFF, '04X')
    result.append(hex_value)

    # Recursive call for next index
    return recursive_partial_sums_hex(arr, index + 1, current_sum, result)

In [43]:
def to_hex_array_1(int_array):
    """
    Convert a NumPy array of integers to a NumPy array of 4-digit hex strings
    using 16-bit representation (handles signed integers with 2's complement).

    Parameters:
        int_array (np.ndarray): NumPy array of integers (signed or unsigned)

    Returns:
        np.ndarray: NumPy array of 4-digit hexadecimal strings
    """
    return numpy.vectorize(lambda x: format(x & 0xF, '04X'))(int_array)

In [44]:
hex_res = recursive_partial_sums_hex(f_sum)

In [45]:
hex_res

array(['0000', '0007', '0007', 'FFF2', 'FFF9', '0000', 'FFF9', 'FFF2',
       '0007'], dtype='<U4')

In [46]:
recursive_partial_sums_hex([0])

array(['0000'], dtype='<U4')

In [47]:
'''
code used to check jesse's calculation with the formulaic calculation 
'''
def hex_signed16_to_int(h):
    x = int(h, 16)
    if x & 0x8000:   # negative in 16-bit
        x -= 0x10000
    return x

hex_to_int_array = [hex_signed16_to_int(h) for h in hex_res]
print("jesse calculated value: ", hex_to_int_array)


def binary32_to_vector(bits, signed=False):
    vals = []
    for i in range(0, 32, 4):
        x = int(bits[i:i+4], 2)
        if signed and (x & 0b1000):
            x -= 16
        vals.append(x)
    return vals

act_words_manual = binary_w_list          # 9 strings: activations for k=0..8
wgt_words_manual = signed_binary_list     # 9 strings: weights for k=0..8

partials = []      # psum for each kernel tap k
combined = []      # running sums over k
running = 0

for k in range(9):
    a = binary32_to_vector(act_words_manual[k], signed=False)
    w = binary32_to_vector(wgt_words_manual[k], signed=True)

    # elementwise MAC across 8 lanes
    p = sum(ai * wi for ai, wi in zip(a, w))
    partials.append(p)

    running += p
    combined.append(running)

final_val = max(combined[-1], 0)   # ReLU on final combined psum

print("formula running combined psums:", combined)
print("formula final output (after ReLU):", final_val)



jesse calculated value:  [0, 7, 7, -14, -7, 0, -7, -14, 7]
formula running combined psums: [0, 7, 7, -14, -7, 0, -7, -14, 7]
formula final output (after ReLU): 7


In [48]:
'''
general code used to calculate over different weight kijs. prints activation and
outputs over different NIJs (0->15)
'''
import numpy as np
nij_count=36;
# ---------- helper to unpack one 32-bit word into eight 4-bit values ----------
def binary32_to_vector(bits, signed=False):
    vals = []
    for i in range(0, 32, 4):
        x = int(bits[i:i+4], 2)
        if signed and (x & 0b1000):  # 4-bit sign bit
            x -= 16                 # convert from unsigned to signed
        vals.append(x)
    return vals


# ---------- load activations ----------
with open("activation.txt") as f:
    activation_lines = [l.strip() for l in f]   # one line per NIJ

# ---------- print activation.txt ----------
print("======= ACTIVATION VALUES (decoded per NIJ) =======")
for nij in range(nij_count):
    a = binary32_to_vector(activation_lines[nij], signed=False)
    print(f"NIJ {nij:2d}: {a}")
print("===================================================\n")

# ---------- load weights from weight_kij/ ----------
weight_lines = []   # weight_lines[k][cout]
for k in range(9):
    fname = f"weight_kij/weight_k{k}.txt"
    with open(fname) as f:
        weight_lines.append([l.strip() for l in f])

nij_count  = len(activation_lines)   # number of NIJs (spatial positions)
cout_count = len(weight_lines[0])    # number of output channels (COUT)
print(weight_lines[0])

# psums[nij, cout]        = combined psum BEFORE ReLU
# golden_outputs[nij,cout]= final output AFTER ReLU
psums = np.zeros((nij_count,9, cout_count), dtype=int)
golden_outputs = np.zeros((nij_count, cout_count), dtype=int)

for nij in range(nij_count):
    act_word = activation_lines[nij]
    a = np.array(binary32_to_vector(act_word, signed=False), dtype=int)  # shape (8,)

    for  k in range(9):
        running = 0
        psum = np.zeros(cout_count, dtype=int)
        for cout in range(cout_count):
            w = np.array(
                binary32_to_vector(weight_lines[k][cout], signed=True),
                dtype=int
            )
            running = int(np.dot(a, w))
            psum[cout] = running  
        # BEFORE ReLU
        psums[nij][k] = psum
        golden_outputs[nij, cout] = max(running, 0)  # AFTER ReLU

print("psums shape (NIJ x COUT):", psums.shape)
for k in range(9):
    for nij in range(nij_count):
        row = " ".join(f"{v:6d}" for v in psums[nij][k])
        print(f"NIJ {nij:2d} psums:   {row}")

nij_kernal = [0,1,2,3,6,7,8,9,12,13,14,15,18,19,20,21];
out = np.zeros((16,8),int)
for kernal in range(16):
    nij = nij_kernal[kernal];
    for k in range(9):
        out[kernal] +=  psums[nij][k]
        if((k==2)or(k==5)):
            nij += 4
        else:
            nij += 1;
                
for kernal in range(16):
    out[kernal] = np.vectorize(max)(out[kernal], 0)

print("\nOutputs (after ReLU):")
for k in range(16):
    row = " ".join(f"{v:6d}" for v in out[k])
    print(f"NIJ {nij:2d} out:     {row}")

print("\nOutputs for each NIJ in hex (after ReLU):")
for nij in range(16):
    hex_row = " ".join(f"{v & 0xFFFF:04x}" for v in out[nij])
    print(f"NIJ {nij:2d} hex:     {hex_row}")

print("\nOutputs in binary:")
for nij in range(16):
    row = " ".join(f"{v & 0xFFFF:016b}" for v in out[nij])
    print(f"NIJ {nij:2d} bin:     {row}")


NIJ  0: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  1: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  2: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  3: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  4: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  5: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  6: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ  7: [4, 4, 0, 0, 0, 3, 2, 2]
NIJ  8: [0, 0, 0, 0, 2, 1, 0, 4]
NIJ  9: [0, 0, 0, 0, 0, 0, 0, 2]
NIJ 10: [0, 3, 1, 0, 2, 0, 0, 4]
NIJ 11: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 12: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 13: [0, 0, 2, 0, 0, 0, 0, 5]
NIJ 14: [0, 1, 3, 0, 0, 0, 0, 5]
NIJ 15: [6, 0, 14, 0, 0, 0, 0, 0]
NIJ 16: [0, 0, 10, 0, 3, 0, 0, 0]
NIJ 17: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 18: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 19: [5, 0, 0, 0, 0, 0, 0, 0]
NIJ 20: [15, 0, 8, 0, 0, 0, 0, 0]
NIJ 21: [15, 0, 15, 0, 0, 0, 0, 0]
NIJ 22: [15, 2, 15, 0, 0, 0, 0, 0]
NIJ 23: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 24: [0, 0, 0, 0, 0, 0, 0, 0]
NIJ 25: [9, 3, 1, 2, 0, 0, 3, 0]
NIJ 26: [13, 5, 5, 3, 0, 0, 4, 0]
NIJ 27: [15, 2, 13, 7, 0, 0, 1, 0]
NIJ 28: [10, 2, 8, 2, 0, 1, 2, 0]
NIJ 29: [0, 0, 0, 0, 0, 0, 0, 0]

In [49]:
with open("output.txt", "w") as f:
    for nij in range(16):
        row = " ".join(f"{v & 0xFFFF:016b}" for v in out[nij])
        f.write(row + "\n")
           

TypeError: unsupported format string passed to numpy.ndarray.__format__