In [38]:
import numpy as np

def softmax(x):
    """
    Compute the softmax of vector x.
    """
    exp_x = np.exp(x - np.max(x))  # Subtract max for numerical stability
    return exp_x / exp_x.sum()

import numpy as np

# Assuming DMODEL, N, BITWIDTH, etc., are defined
DMODEL = 16  # Example values, adjust as necessary
N = 16       # Example values, adjust as necessary
BITWIDTH = 4 # Bit-width for data_t
BITWIDTH3 = 12 # Bit-width for data3_t

In [39]:
import re
import numpy as np


def read_weights(file_path):
    with open(file_path, "r") as file:
        content = file.read()

    # Regular expression to find matrices
    matrices = re.findall(r"{\{(.*?)\}\}", content, re.DOTALL)

    make_int = lambda x: int(x.strip().strip("{").strip("}"))

    # Process and store each matrix
    Q_W = np.array(
        [
            list(map(make_int, row.strip(",").split(",")))
            for row in matrices[0].split("\n")
            if row.strip() != ""
        ]
    )
    K_W = np.array(
        [
            list(map(make_int, row.strip(",").split(",")))
            for row in matrices[1].split("\n")
            if row.strip() != ""
        ]
    )
    V_W = np.array(
        [
            list(map(make_int, row.strip(",").split(",")))
            for row in matrices[2].split("\n")
            if row.strip() != ""
        ]
    )

    return Q_W, K_W, V_W


# Example usage
file_path = f"weights{DMODEL}.h"  # Replace with the actual file path
Q_W, K_W, V_W = read_weights(file_path)


In [68]:
import numpy as np

# Assuming DMODEL, N, Q_W, K_W, V_W are defined

# Step 1: Generate and normalize random tokens
tokens = np.random.rand(N, DMODEL) * 15

# Normalize Q, K, V matrices
Q_W_norm = Q_W / np.linalg.norm(Q_W, axis=1, keepdims=True)
K_W_norm = K_W / np.linalg.norm(K_W, axis=1, keepdims=True)
V_W_norm = V_W / np.linalg.norm(V_W, axis=1, keepdims=True)

# Step 2: Apply QKV transformations
Q = tokens @ Q_W_norm
K = tokens @ K_W_norm
V = tokens @ V_W_norm

# Step 3: Compute attention using softmax
K_T = K.T
attention_scores = Q @ K_T / np.sqrt(DMODEL)
attention_weights = np.exp(attention_scores - np.max(attention_scores, axis=1, keepdims=True))
attention_weights /= np.sum(attention_weights, axis=1, keepdims=True)
attention_output = attention_weights @ V

# Step 4: Normalize output tokens back into the range 0-15
output_tokens = np.clip(np.round((attention_output - np.min(attention_output)) / (np.max(attention_output) - np.min(attention_output)) * 15), 0, 15).astype(int)

# Output tokens
print(output_tokens)


[[13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]
 [13 13  7  3 12 13 15  6 15  8  8  1  0  9  2  8]]


Q_W: [[ 4 14 14 ... 11 13 12]
 [10 13 10 ... 13 14  7]
 [10 10  0 ... 11  8  9]
 ...
 [13 15  7 ... 14 13  4]
 [14 10  2 ...  8  4  6]
 [12 14  7 ...  4  2  5]]
K_W: [[ 8  4  9 ...  6 12  8]
 [ 7 13  4 ...  6  5  4]
 [ 2  9  0 ... 15 11 13]
 ...
 [ 9  2  6 ...  4 15  5]
 [ 2 11 15 ...  0 11  2]
 [ 0  6  7 ...  4  8 13]]
V_W: [[ 8 15 11 ...  9  7 14]
 [ 9 13 11 ...  0 15 15]
 [10  5  9 ...  6  7  9]
 ...
 [15 10  5 ...  9  5  4]
 [ 8 10  1 ...  4  8  8]
 [13  8 15 ...  2  5  4]]


In [69]:
import numpy as np

# Example values for demonstration
DMODEL = 16  # Model dimension
N = 16       # Number of tokens

# Generate random weights for Q, K, V for demonstration purposes
Q_W = np.random.rand(DMODEL, DMODEL)
K_W = np.random.rand(DMODEL, DMODEL)
V_W = np.random.rand(DMODEL, DMODEL)

# Generate and normalize random tokens
tokens = np.random.rand(N, DMODEL) * 15

# Normalize Q, K, V matrices
Q_W_norm = Q_W / np.linalg.norm(Q_W, axis=1, keepdims=True)
K_W_norm = K_W / np.linalg.norm(K_W, axis=1, keepdims=True)
V_W_norm = V_W / np.linalg.norm(V_W, axis=1, keepdims=True)

# Apply QKV transformations
Q = tokens @ Q_W_norm
K = tokens @ K_W_norm
V = tokens @ V_W_norm

# Compute attention using softmax
K_T = K.T
attention_scores = Q @ K_T / np.sqrt(DMODEL)
attention_weights = np.exp(attention_scores - np.max(attention_scores, axis=1, keepdims=True))
attention_weights /= np.sum(attention_weights, axis=1, keepdims=True)
attention_output = attention_weights @ V

# Normalize output tokens back into the range 0-15
output_tokens = np.clip(np.round((attention_output - np.min(attention_output)) / (np.max(attention_output) - np.min(attention_output)) * 15), 0, 15).astype(int)

# Diagnostic prints
attention_scores_diagnostic = attention_scores[:2, :2]  # Print a small part of the attention scores
output_tokens_diagnostic = output_tokens[:2, :2]        # Print a small part of the output tokens

attention_scores_diagnostic, output_tokens_diagnostic


(array([[2176.47359756, 2217.60515701],
        [2292.46851177, 2339.67972877]]),
 array([[3, 9],
        [3, 9]]))