In [149]:
from typing import Type
import pyrtl
from pyrtl import *
import numpy as np
import onnx
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from enum import IntEnum
from hardware_accelerators.nn import load_model, MLP
from hardware_accelerators.dtypes import *
from hardware_accelerators.rtllib import *
from hardware_accelerators.simulation import AcceleratorSimulator
from hardware_accelerators.simulation.matrix_utils import *
from hardware_accelerators.simulation.buffer import WeightFIFOSimulator

# New Accelerator Sim Testing


## GEMM


In [None]:
simulator = AcceleratorSimulator.default_config(array_size=3, num_weight_tiles=2)

simulator.setup()

weights = np.ones((3, 3))
activations = np.array([[1, 2, 3], [-4, -5, -6], [7, 8, 9]])

simulator.load_weights(weights, 0)

simulator.execute_instruction(
    data_vec=activations[0],
    load_new_weights=True,
    flush_pipeline=False,
    activation_enable=True,
    activation_func="relu",
)
simulator.execute_instruction(
    data_vec=activations[1],
    accum_addr=1,
    flush_pipeline=False,
    activation_enable=True,
    activation_func="relu",
)
simulator.execute_instruction(
    data_vec=activations[2],
    accum_addr=2,
    activation_enable=True,
    activation_func="relu",
    flush_pipeline=True,
)

results = np.zeros((activations.shape[0], weights.shape[1]))

for i in range(3):
    results[i] = simulator._get_outputs()
    simulator.execute_instruction(nop=True)

gt = np.maximum(0, (activations @ weights))

assert np.isclose(results, gt).all()

simulator.history

[
 Simulation Step 0
 Input Signals:
 --------------------------------------------------------------------------------
   data_enable: 0
   data_inputs: None
   weight_start: 1
   weight_tile_addr: 0
   accum_addr: 0
   accum_mode: 0
   act_start: 0
   act_func: 0
 
 Systolic Array State:
 --------------------------------------------------------------------------------
 Inputs:
   w_en: 0
   enable: 0
   weights: [0. 0. 0.]
   data: [0. 0. 0.]
 
 Weights Matrix:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Data Matrix:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Accumulators:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Control Registers:
 data_controls: [0, [0, 0]]
 accum_controls: [0, 0, 0]
 control_out: 0
 
 Outputs:
 [0. 0. 0.]
 ----------------------------------------
 
 
 Accumulator State:
 --------------------------------------------------------------------------------
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Outputs

## GEMV


In [None]:
simulator = AcceleratorSimulator.default_config(array_size=3, num_weight_tiles=2)
simulator.setup()

w = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]).T
activations = np.array([0, 0.5, 1])

simulator.load_weights(w, 0)

simulator.execute_instruction(
    data_vec=activations,
    load_new_weights=True,
    flush_pipeline=True,
    activation_enable=True,
    activation_func="relu",
)

print(f"Weights:\n{w}")
print(
    f"Systolic Array (weights loaded):\n{simulator.accelerator.inspect_systolic_array_state(simulator.sim)}"
)

simulator.execute_instruction(nop=True)
simulator.execute_instruction(nop=True)

results = simulator._get_outputs()
results

Weights:
[[1 2 3]
 [1 2 3]
 [1 2 3]]
Systolic Array (weights loaded):
Inputs:
  w_en: 0
  enable: 0
  weights: [0. 0. 0.]
  data: [0. 0. 0.]

Weights Matrix:
[[1. 2. 3.]
 [1. 2. 3.]
 [1. 2. 3.]]

Data Matrix:
[[0.  0.5 1. ]
 [0.5 1.  0. ]
 [1.  0.  0.5]]

Accumulators:
[[0.  1.  3. ]
 [0.5 3.  3. ]
 [0.  0.  0. ]]

Control Registers:
data_controls: [0, [0, 0]]
accum_controls: [0, 0, 1]
control_out: 0

Outputs:
[0. 0. 0.]
----------------------------------------



array([1.5, 3. , 4.5])

In [None]:
simulator.history

[
 Simulation Step 0
 Input Signals:
 --------------------------------------------------------------------------------
   data_enable: 0
   data_inputs: None
   weight_start: 1
   weight_tile_addr: 0
   accum_addr: 0
   accum_mode: 0
   act_start: 0
   act_func: 0
 
 Systolic Array State:
 --------------------------------------------------------------------------------
 Inputs:
   w_en: 0
   enable: 0
   weights: [0. 0. 0.]
   data: [0. 0. 0.]
 
 Weights Matrix:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Data Matrix:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Accumulators:
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Control Registers:
 data_controls: [0, [0, 0]]
 accum_controls: [0, 0, 0]
 control_out: 0
 
 Outputs:
 [0. 0. 0.]
 ----------------------------------------
 
 
 Accumulator State:
 --------------------------------------------------------------------------------
 [[0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]
  [0. 0. 0.]]
 
 Outputs

## Weight Loading


In [None]:
# simulator = AcceleratorSimulator.default_config(array_size=3, num_weight_tiles=2)
simulator.setup()

w = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
w_neg = w * -1

simulator.load_weights(w, 0)
simulator.load_weights(w_neg, 1)

simulator.execute_instruction(load_new_weights=True, weight_tile_addr=0)
simulator.execute_instruction(load_new_weights=True, weight_tile_addr=1)
simulator.execute_instruction(nop=True)

for step in simulator.history:
    print(f"Step {step.step}\n{step.systolic_state.weights}\n")

Step 0
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

Step 1
[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]

Step 2
[[3. 1. 2.]
 [0. 0. 0.]
 [0. 0. 0.]]

Step 3
[[2. 3. 1.]
 [3. 1. 2.]
 [0. 0. 0.]]

Step 4
[[1. 2. 3.]
 [2. 3. 1.]
 [3. 1. 2.]]

Step 5
[[1. 2. 3.]
 [2. 3. 1.]
 [3. 1. 2.]]

Step 6
[[-3. -1. -2.]
 [ 1.  2.  3.]
 [ 2.  3.  1.]]

Step 7
[[-2. -3. -1.]
 [-3. -1. -2.]
 [ 1.  2.  3.]]



# PyTorch Utilities


## Load MNIST Test Data


In [2]:
# Data transformation: convert images to tensor and normalize them
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ]
)
# Download MNIST test data
test_dataset = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [80]:
image, label = next(iter(test_loader))
image = image.numpy().reshape(-1)
image.shape, label

((784,), tensor([7]))

In [84]:
def get_activation():
    image, _ = next(iter(test_loader))
    image = image.detach().numpy().reshape(-1)
    return image

## Initializing the trained PyTorch model


In [5]:
model = load_model("models/mlp_mnist.pth")
model.eval()

MLP(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [6]:
for name, param in model.named_parameters():
    print(name)

fc1.weight
fc1.bias
fc2.weight
fc2.bias


### Numpy only inference

In [7]:
inputs = image  # numpy vector representing the image


# numpy softmax
def softmax(x: np.ndarray):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)


fc1_weight = model.fc1.weight.data.numpy()
fc1_bias = model.fc1.bias.data.numpy()
fc2_weight = model.fc2.weight.data.numpy()
fc2_bias = model.fc2.bias.data.numpy()

x = inputs @ fc1_weight.T + fc1_bias
x = np.maximum(0, x)
x = x @ fc2_weight.T + fc2_bias
x = softmax(x)

# get the index of the maximum value
predicted_class = np.argmax(x)
print(f"Predicted class: {predicted_class}")

Predicted class: 7


### Generalized parameter/layer extraction


In [51]:
def get_layer_info(model):
    layers = []
    for name, module in model.named_modules():
        # Skip the root module itself
        if name == "":
            continue

        if isinstance(module, nn.Linear):
            layers.append(
                {
                    "type": "linear",
                    "weight": module.weight.data.numpy(),
                    "bias": (
                        module.bias.data.numpy() if module.bias is not None else None
                    ),
                }
            )
        elif isinstance(module, nn.ReLU):
            layers.append({"type": "relu"})
        elif isinstance(module, nn.Flatten):
            layers.append({"type": "flatten"})
        # Add more layer types as needed
        # elif isinstance(module, nn.Conv2d):
        #     layers.append(...)

    return layers


def numpy_inference(image, layers):
    x = image

    for layer in layers:
        if layer["type"] == "linear":
            x = np.dot(layer["weight"], x)
            if layer["bias"] is not None:
                x = x + layer["bias"]
        elif layer["type"] == "relu":
            x = np.maximum(0, x)
        elif layer["type"] == "flatten":
            # If input is already 1D, this is a no-op
            x = x.flatten()

    return x


# Extract model structure
layers = get_layer_info(model)

# Print model structure
for i, layer in enumerate(layers):
    if layer["type"] == "linear":
        print(
            f"Layer {i}: Linear - Weight shape: {layer['weight'].shape}, Bias shape: {layer['bias'].shape}"
        )
    else:
        print(f"Layer {i}: {layer['type']}")

# Run inference
prediction = numpy_inference(image, layers)
predicted_class = np.argmax(prediction)
predicted_class

Layer 0: flatten
Layer 1: Linear - Weight shape: (128, 784), Bias shape: (128,)
Layer 2: relu
Layer 3: Linear - Weight shape: (10, 128), Bias shape: (10,)


np.int64(7)

# Running model on the accelerator sim


In [152]:
CHUNK_SIZE = 4

config = AcceleratorConfig(
    array_size=CHUNK_SIZE,
    num_weight_tiles=4,
    data_type=BF16,
    weight_type=BF16,
    accum_type=BF16,
    pe_adder=float_adder,
    pe_multiplier=float_multiplier,
    pipeline=False,
    accum_adder=float_adder,
    accum_addr_width=12,
)

simulator = AcceleratorSimulator(config)

In [153]:
sim_logprobs = simulator.simulate_pytorch_model(model, get_activation())
sim_logprobs, np.argmax(sim_logprobs)

fc1_weight: (128, 784)
fc1_bias: (128,)
fc2_weight: (10, 128)
fc2_bias: (10,)
inputs: (784,)
fc1_out: (128,)


(array([8.82158504e-07, 8.82158504e-07, 8.82158504e-07, 3.37657267e-03,
        1.00112238e-09, 3.99896932e-08, 8.47857112e-11, 9.96611666e-01,
        3.75668401e-08, 7.27170995e-06, 8.82158504e-07, 8.82158504e-07]),
 np.int64(7))

In [None]:
for i in range(1, 1):
    print("sup")

In [87]:
# Extract layer weights and biases
weights_1 = model.fc1.weight.numpy(force=True)
bias_1 = model.fc1.bias.numpy(force=True)
weights_2 = model.fc2.weight.numpy(force=True)
bias_2 = model.fc2.bias.numpy(force=True)

# Add bias to first layer weights and 1 to activations
W_aug, x_aug = bias_trick(weights_1, bias_1, activations)

# Run the first layer
x1 = W_aug @ x_aug
act_1 = np.maximum(x1, 0)

# Add bias to second layer weights and 1 to previous output
W2_aug, x2_aug = bias_trick(weights_2, bias_2, act_1)

# Run the second layer
logits = W2_aug @ x2_aug
act_out2 = softmax(logits)
np.argmax(act_out2)

np.int64(7)

In [91]:
x1

array([ -1.57116914,  -2.77032444,   3.28895059,  -1.01215341,
        -4.06945554,   6.83449887,   6.74118985,  -7.18483314,
       -11.37407285,   2.53657868,   3.61376523, -11.53509028,
        -6.01232959,  -2.27680407,  -0.39357647,  -4.1662203 ,
        -8.5312461 ,  -4.7962629 ,  -3.57882952,  -2.13865806,
        -3.46204166,  -8.72168555,   3.8821389 ,  -4.17779366,
        -3.66286114,  -7.38268444,   4.42328787,   7.22147077,
       -10.46859517,  -9.09268379,  -8.3326673 ,   2.6980674 ,
        -2.97501624,   4.23912435,   0.27626068,   1.13510098,
        -5.67404258,   1.21173453, -10.13136841,  -4.10109907,
         5.33964005,  -8.45681775,  -5.93670053, -10.10284516,
        -7.46963125,   9.82199474, -11.86524081,  -1.43474744,
        -0.93775183,  -0.98235984,  -1.7065348 ,   9.04186471,
        -4.28922505,   7.01130101,  -5.97062107, -14.54885137,
        -0.39288433,  -3.25831139,  -1.2732585 ,  -2.79558465,
         0.51988318,  -8.51720984,  -3.81590056,  -5.05

In [97]:
partial = np.zeros(config.array_size)
result = []
tile_generator = generate_gemv_tiles(x_aug, W_aug, CHUNK_SIZE)

for tile in tile_generator:
    if tile.first:
        partial = tile.matrix @ tile.vector
    else:
        partial += tile.matrix @ tile.vector

    if tile.last:
        result.append(partial)

result = np.array(result).flatten()

np.isclose(result, x1).all()

np.True_

In [None]:
tile_generator = generate_gemv_tiles(x_aug, W_aug, CHUNK_SIZE)

fc2_tile_generator = generate_gemv_tiles(fc1_aug, W2_aug, CHUNK_SIZE)
count = 0
total = 0
for tile in tile_generator:
    total += 1

for tile in fc2_tile_generator:
    total += 1

total

6403

In [150]:
count_total_gemv_tiles([(784, 128), (128, 10)], 4)

6403

In [None]:
((128 * 785) + (129 * 10)) / (config.array_size**2)

6360.625

In [None]:
simulator.setup()
chunk_size = config.array_size

tile_generator = generate_gemv_tiles(x_aug, W_aug, CHUNK_SIZE)

for tile in tile_generator:
    simulator.load_weights(weights=tile.matrix.T, tile_addr=0)
    simulator.execute_instruction(
        load_new_weights=True,
        weight_tile_addr=0,
        data_vec=tile.vector,
        accum_addr=tile.index,
        accum_mode=not tile.first,
        activation_func="relu",
        activation_enable=tile.last,
        flush_pipeline=True,
    )

simulator.execute_instruction(nop=True)
simulator.execute_instruction(nop=True)

sim_fc1 = np.array(simulator.output_trace)

sim_fc1  # , simulator.accelerator.inspect_accumulator_state(simulator.sim)[:4]

# simulator.reset_output_trace()
simulator.output_trace = []

W2_aug, fc1_aug = bias_trick(weights_2, bias_2, sim_fc1.flatten())

fc2_tile_generator = generate_gemv_tiles(fc1_aug, W2_aug, CHUNK_SIZE)

for tile in fc2_tile_generator:
    simulator.load_weights(weights=tile.matrix.T, tile_addr=0)
    simulator.execute_instruction(
        load_new_weights=True,
        weight_tile_addr=0,
        data_vec=tile.vector,
        accum_addr=tile.index,
        accum_mode=not tile.first,
        activation_enable=tile.last,
        flush_pipeline=True,
    )

simulator.execute_instruction(nop=True)
simulator.execute_instruction(nop=True)

sim_fc2 = np.array(simulator.output_trace).flatten()
sim_fc2

array([[-1.484375  , -2.34375   ,  3.078125  , -1.5859375 ],
       [ 0.        ,  6.8125    ,  6.53125   ,  0.        ],
       [ 0.        ,  2.875     ,  3.3125    ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  3.6875    ,  0.        ],
       [ 0.        ,  0.        ,  4.21875   ,  7.46875   ],
       [ 0.        ,  0.        ,  0.        ,  2.484375  ],
       [ 0.        ,  4.625     ,  0.72265625,  1.015625  ],
       [ 0.        ,  1.        ,  0.        ,  0.        ],
       [ 5.09375   ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  9.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  7.53125   ],
       [ 0.        ,  6.5625    ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.

array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  8.25000000e+00,
       -6.78125000e+00, -3.09375000e+00, -9.25000000e+00,  1.39375000e+01,
       -3.15625000e+00,  2.10937500e+00,  9.69782839e-37,  9.69782839e-37])

In [113]:
softmax(sim_fc2)

array([8.82158504e-07, 8.82158504e-07, 8.82158504e-07, 3.37657267e-03,
       1.00112238e-09, 3.99896932e-08, 8.47857112e-11, 9.96611666e-01,
       3.75668401e-08, 7.27170995e-06, 8.82158504e-07, 8.82158504e-07])

In [None]:
def bias_trick(weights: np.ndarray, bias: np.ndarray, x: np.ndarray) -> tuple:
    """Applies bias trick to combine weights and bias into augmented matrices.

    Args:
        weights: Weight matrix (output_dim, input_dim)
        bias: Bias vector (output_dim,)
        x: Input matrix (n_samples, input_dim) or vector (input_dim,)

    Returns:
        tuple: (augmented_weights, augmented_input) where:
            - augmented_weights: (output_dim, input_dim + 1)
            - augmented_input: (n_samples, input_dim + 1) or (input_dim + 1,)
    """
    aug_weights = np.c_[weights, bias]

    # Handle both vector and matrix inputs
    if x.ndim == 1:
        aug_input = np.append(x, 1)
    else:
        aug_input = np.c_[x, np.ones(x.shape[0])]

    return aug_weights, aug_input

In [151]:
np.argmax(softmax(sim_fc2))

np.int64(7)

In [None]:
weights @ activations + bias

array([ -1.5711691 ,  -2.7703242 ,   3.2889502 ,  -1.0121531 ,
        -4.069455  ,   6.834499  ,   6.7411895 ,  -7.1848335 ,
       -11.374073  ,   2.536579  ,   3.6137648 , -11.5350895 ,
        -6.0123296 ,  -2.276804  ,  -0.3935763 ,  -4.1662207 ,
        -8.531246  ,  -4.7962627 ,  -3.5788298 ,  -2.138658  ,
        -3.462042  ,  -8.721685  ,   3.8821383 ,  -4.177794  ,
        -3.662861  ,  -7.382684  ,   4.4232874 ,   7.2214704 ,
       -10.4685955 ,  -9.092685  ,  -8.332668  ,   2.6980672 ,
        -2.9750164 ,   4.2391243 ,   0.2762604 ,   1.1351011 ,
        -5.6740427 ,   1.2117343 , -10.131369  ,  -4.1010985 ,
         5.3396406 ,  -8.456818  ,  -5.936701  , -10.102846  ,
        -7.4696317 ,   9.821995  , -11.865241  ,  -1.4347476 ,
        -0.93775195,  -0.9823608 ,  -1.7065349 ,   9.041865  ,
        -4.289225  ,   7.0113015 ,  -5.970621  , -14.548852  ,
        -0.39288452,  -3.2583108 ,  -1.2732584 ,  -2.7955844 ,
         0.5198832 ,  -8.51721   ,  -3.8159013 ,  -5.05

In [None]:
simulator.setup()
# chunk_size = 16
chunk_size = config.array_size
tile_generator = generate_gemv_tiles(activations, weights, chunk_size)

result = []

for tile in tile_generator:
    # partial = tile.matrix @ tile.vector
    partial = tile.vector @ tile.matrix
    if tile.index >= len(result):
        result.append(partial)
    else:
        result[tile.index] += partial

    simulator.load_weights(weights=tile.matrix, tile_addr=0)
    simulator.execute_instruction(
        load_new_weights=True,
        weight_tile_addr=0,
        data_vec=tile.vector,
        accum_addr=tile.index,
        accum_mode=not tile.first,
        activation_func="relu",
        activation_enable=tile.last,
        flush_pipeline=True,
    )

    simulator.step()
    sim_partial = simulator.accelerator.inspect_accumulator_state(simulator.sim)[
        tile.index
    ]

    print(tile)
    print(f"Calculated partial:\n{result[tile.index]}\n")
    print(f"Simulation Partial Result:\n{sim_partial}\n\n")

    if tile.last:
        break

GemvTile(
    index: 0
    first: True
    last: False
    matrix:
[[ 0.0067 -0.0223  0.0327 -0.0118]
 [ 0.0036  0.017   0.0161  0.018 ]
 [ 0.0055  0.018  -0.002  -0.0071]
 [-0.0195 -0.0147  0.0034 -0.0122]]
    vector:
[-0.4242 -0.4242 -0.4242 -0.4242]
    partial_result:
[ 0.0016  0.0008 -0.0213  0.0056]
)
Calculated partial:
[ 0.0015598   0.00084817 -0.02133417  0.00556452]

Simulation Partial Result:
[ 0.00158691  0.00091553 -0.02111816  0.0055542 ]


GemvTile(
    index: 0
    first: False
    last: False
    matrix:
[[ 0.0382 -0.0016  0.0227  0.0357]
 [-0.0001  0.0306 -0.0311  0.0168]
 [-0.0326 -0.0403 -0.0299 -0.0426]
 [ 0.018   0.0075 -0.0182 -0.0209]]
    vector:
[-0.4242 -0.4242 -0.4242 -0.4242]
    partial_result:
[-0.01    0.0016  0.0239  0.0047]
)
Calculated partial:
[-0.00840414  0.00243366  0.00258482  0.01024831]

Simulation Partial Result:
[-0.00842285  0.00245667  0.00256348  0.02416992]


GemvTile(
    index: 0
    first: False
    last: False
    matrix:
[[-0.0205  

In [None]:
result = np.array(result).flatten()

# simulator.execute_instruction(nop=True)
# simulator.execute_instruction(nop=True)
sim_result = simulator.output_trace

print(f"{sim_result=}")
print(f"{result=}")

sim_result=[array([ 2.546875, -3.078125,  0.140625,  4.03125 ]), array([-2.046875, -1.84375 , -4.28125 , -1.6875  ])]
result=array([ 2.55966587, -3.09237437,  0.13641229,  4.04874201, -2.07218613,
       -2.25486009, -4.17895806, -1.71206021])


In [155]:
sim_fc1 = simulator.accelerator.inspect_accumulator_state(simulator.sim).flatten()[:128]

sim_fc1

array([ -1.453125  ,  -2.265625  ,  -4.        ,  -2.75      ,
         1.140625  ,   0.12792969,   1.09375   ,  -0.84765625,
        -0.5390625 ,  -1.2109375 ,  -4.53125   ,  -3.59375   ,
        -1.78125   ,  -0.80859375,  -2.96875   ,  -3.4375    ,
        -3.890625  ,  -2.9375    ,  -3.640625  ,  -3.875     ,
        -3.984375  ,  -2.5       ,  -2.96875   ,  -3.859375  ,
        -1.6015625 ,   2.953125  ,   0.953125  ,   1.1171875 ,
        -7.96875   , -10.        ,  -7.75      ,  -8.3125    ,
         1.8984375 ,   0.8203125 ,  -1.484375  ,   0.66796875,
        -2.671875  ,  -4.71875   ,  -6.15625   ,  -8.        ,
        -7.03125   ,  -5.875     ,  -4.25      ,  -4.375     ,
        -3.703125  ,  -6.90625   ,  -2.59375   ,  -3.859375  ,
        -2.765625  ,  -1.953125  ,   1.046875  ,   3.125     ,
         0.171875  ,   0.80859375,   1.21875   ,  -2.875     ,
        -1.765625  ,   0.84375   ,   0.11474609,  -0.27539062,
        -5.40625   ,  -0.88671875,   0.83984375,   0.65

In [None]:
def simulate_linear_layer(
    self,
    inputs: np.ndarray,
    weights: np.ndarray,
    bias: np.ndarray,
    activation_func=None,
) -> np.ndarray:

    weights_bias = np.concatenate([weights, bias.reshape(-1, 1)], axis=1)
    activations = np.concatenate([inputs.flatten(), np.ones(1)])

    tile_generator = generate_gemv_tiles(
        activations, weights_bias, self.config.array_size
    )

    for tile in tile_generator:
        simulator.load_weights(weights=tile.matrix, tile_addr=0)
        simulator.execute_instruction(
            load_new_weights=True,
            weight_tile_addr=0,
            data_vec=tile.vector,
            accum_addr=tile.index,
            accum_mode=not tile.first,
            activation_func=activation_func,
            activation_enable=tile.last,
            flush_pipeline=True,
        )

    simulator.execute_instruction(nop=True)
    result = np.array(self.output_trace).flatten()
    self.output_trace = []
    return result


def simulate_pytorch_model(self, model: MLP, inputs: np.ndarray) -> np.ndarray:
    """Simulates a PyTorch model using the accelerator simulator.

    Args:
        model: PyTorch model to simulate
        inputs: Input data as numpy array

    Returns:
        Model outputs as numpy array
    """
    fc1_weight = model.fc1.weight.data.numpy(force=True)
    fc1_bias = model.fc1.bias.data.numpy(force=True)
    fc2_weight = model.fc2.weight.data.numpy(force=True)
    fc2_bias = model.fc2.bias.data.numpy(force=True)

    activations = self.simulate_linear_layer(
        inputs=inputs, weights=fc1_weight, bias=fc1_bias, activation_func="relu"
    )
    logits = self.simulate_linear_layer(
        inputs=activations, weights=fc2_weight, bias=fc2_bias
    )

    return softmax(logits)

linear
relu


  x = inputs @ weights.T + bias
  x = np.maximum(0, x)


TypeError: max() received an invalid combination of arguments - got (out=NoneType, axis=NoneType, ), but expected one of:
 * ()
 * (Tensor other)
 * (int dim, bool keepdim = False)
      didn't match because some of the keywords were incorrect: out, axis
 * (name dim, bool keepdim = False)
      didn't match because some of the keywords were incorrect: out, axis


# Testing the compiled simulator

In [158]:
from hardware_accelerators.simulation.accelerator import CompiledSimulator

In [161]:
reset_working_block()
compsim = CompiledSimulator(config)

gcc: error: unrecognized command-line option ‘-m64’


CalledProcessError: Command '['gcc', '-O0', '-march=native', '-std=c99', '-m64', '-shared', '-fPIC', '/tmp/tmp8ym5v5vp/pyrtlsim.c', '-o', '/tmp/tmp8ym5v5vp/pyrtlsim.so']' returned non-zero exit status 1.

In [None]:
config