In [17]:
from pyrtl import CompiledSimulation, reset_working_block
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from hardware_accelerators import *
from hardware_accelerators.simulation.compile import (
    ReusableCompiledSimulation,
    CompiledAccelerator,
)
from hardware_accelerators.simulation.compile import CompiledAcceleratorSimulator
from hardware_accelerators.rtllib.accelerator import CompiledAcceleratorConfig
from hardware_accelerators.nn import load_model

## Test loading from saved sim


In [18]:
# Data transformation: convert images to tensor and normalize them
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ]
)
# Download MNIST test data
test_dataset = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)


def get_batch(batch_size):
    loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    batch, labels = next(iter(loader))
    return batch.reshape(batch_size, -1).numpy(), labels.numpy()


def get_activation():
    image, _ = next(iter(test_loader))
    image = image.detach().numpy().reshape(-1)
    return image

In [19]:
CHUNK_SIZE = 4

config = CompiledAcceleratorConfig(
    array_size=CHUNK_SIZE,
    activation_type=BF16,
    weight_type=BF16,
    multiplier=float_multiplier,
)

In [20]:
reset_working_block()

In [21]:
model = load_model("models/mlp_mnist.pth")
acc_sim = CompiledAcceleratorSimulator(config)

  model.load_state_dict(torch.load(model_path, map_location=device))


Using precompiled library: /Users/kaibreese/UCSD/dsc180b/hardware-accelerators/hardware_accelerators/lib/wb16ab16s4/pyrtlsim.so


In [27]:
results = acc_sim.run_mlp(model, get_activation())
results

array([1.87383124e-06, 1.87383124e-06, 1.87383124e-06, 6.69541272e-06,
       1.87383124e-06, 1.87383124e-06, 1.87383124e-06, 9.99974566e-01,
       1.87383124e-06, 1.87383124e-06, 1.87383124e-06, 1.87383124e-06])

In [28]:
torch.argmax(torch.Tensor(results))

tensor(7)

In [11]:
loaded_sim.inspect_outputs()

PyrtlError: No context available. Please run a simulation step

# Compiling all configurations


In [None]:
from typing import Iterator, Type, List, Callable
from itertools import product

from hardware_accelerators.dtypes import *


def generate_accelerator_configs(
    array_size: int = 16,
    dtypes: List[Type[BaseFloat]] = None,
    multipliers: List[Callable] = None,
) -> Iterator[CompiledAcceleratorConfig]:
    """
    Generate all valid CompiledAcceleratorConfig combinations.

    Args:
        array_size: Size of the systolic array
        dtypes: List of data types to consider. Defaults to [Float8, BF16, FP16, FP32]
        multipliers: List of multiplier functions. Defaults to [float_multiplier, lmul]

    Yields:
        Valid CompiledAcceleratorConfig objects

    Restrictions:
        1. The activation_type must be greater than or equal to the weight_type in terms of bitwidth.
        2. 16-bit float types (BF16, FP16) should not be combined with each other.
           They should only pair with themselves or with FP32.
    """
    if dtypes is None:
        dtypes = [Float8, BF16, Float16, Float32]

    if multipliers is None:
        multipliers = [float_multiplier, lmul_fast]

    # Sort dtypes by bitwidth for easier comparison
    dtype_bitwidths = {dtype: dtype.bitwidth() for dtype in dtypes}
    sorted_dtypes = sorted(dtypes, key=lambda d: dtype_bitwidths[d])

    # Identify 16-bit float types
    bit16_float_types = [dtype for dtype in dtypes if dtype_bitwidths[dtype] == 16]

    # Generate all combinations
    for multiplier in multipliers:
        for weight_type in sorted_dtypes:
            # Find valid activation types based on bitwidth
            valid_activation_types = [
                dtype
                for dtype in sorted_dtypes
                if dtype_bitwidths[dtype] >= dtype_bitwidths[weight_type]
            ]

            for activation_type in valid_activation_types:
                # Skip invalid combinations of 16-bit float types
                if (
                    weight_type in bit16_float_types
                    and activation_type in bit16_float_types
                    and weight_type != activation_type
                ):
                    continue

                yield CompiledAcceleratorConfig(
                    array_size=array_size,
                    activation_type=activation_type,
                    weight_type=weight_type,
                    multiplier=multiplier,
                )


# Example usage:
def print_all_configs():
    for i, config in enumerate(generate_accelerator_configs()):
        print(f"Config {i+1}:")
        print(f"  Array Size: {config.array_size}")
        print(f"  Activation Type: {config.activation_type.__name__}")
        print(f"  Weight Type: {config.weight_type.__name__}")
        print(f"  Multiplier: {config.multiplier.__name__}")
        print()

In [32]:
print_all_configs()

Config 1:
  Array Size: 16
  Activation Type: Float8
  Weight Type: Float8
  Multiplier: float_multiplier

Config 2:
  Array Size: 16
  Activation Type: BF16
  Weight Type: Float8
  Multiplier: float_multiplier

Config 3:
  Array Size: 16
  Activation Type: Float16
  Weight Type: Float8
  Multiplier: float_multiplier

Config 4:
  Array Size: 16
  Activation Type: Float32
  Weight Type: Float8
  Multiplier: float_multiplier

Config 5:
  Array Size: 16
  Activation Type: BF16
  Weight Type: BF16
  Multiplier: float_multiplier

Config 6:
  Array Size: 16
  Activation Type: Float32
  Weight Type: BF16
  Multiplier: float_multiplier

Config 7:
  Array Size: 16
  Activation Type: Float16
  Weight Type: Float16
  Multiplier: float_multiplier

Config 8:
  Array Size: 16
  Activation Type: Float32
  Weight Type: Float16
  Multiplier: float_multiplier

Config 9:
  Array Size: 16
  Activation Type: Float32
  Weight Type: Float32
  Multiplier: float_multiplier

Config 10:
  Array Size: 16
  Activa