Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions hw/ip/snitch_test/src/tb_bin.sv
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,13 @@ module tb_bin;
if (exit_code == 0)
#200ns;
end while (exit_code == 0);

exit_code >>= 1;
if (exit_code > 0) begin
$error("[FAILURE] Finished with exit code %2d", exit_code);
end else begin

if (exit_code == 0) begin
$info("[SUCCESS] Program finished successfully");
end else begin
$error("[FAILURE] Finished with exit code %2d", exit_code);
end
$finish;
end
Expand Down
6 changes: 3 additions & 3 deletions hw/ip/snitch_test/src/verilator_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ int Sim::run() {
target.init(sim_thread_main, this);

int exit_code = htif_t::run();
if (exit_code > 0)
fprintf(stderr, "[FAILURE] Finished with exit code %2d\n", exit_code);
else
if (exit_code == 0)
fprintf(stderr, "[SUCCESS] Program finished successfully\n");
else
fprintf(stderr, "[FAILURE] Finished with exit code %2d\n", exit_code);
return exit_code;
}

Expand Down
14 changes: 14 additions & 0 deletions hw/system/spatz_cluster/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,20 @@ spatz.gendata:
fi \
done

.PHONY: spatz.cleandata
spatz.cleandata:
@for benchmark_dir in $(ROOT)/sw/spatzBenchmarks/*/; do \
data_dir="$$benchmark_dir/data"; \
if [ -d "$$data_dir" ]; then \
data_count=$$(find "$$data_dir" -name 'data*.h' -type f 2>/dev/null | wc -l); \
if [ "$$data_count" -gt 0 ]; then \
echo "Cleaning $$data_count data file(s) from $$data_dir"; \
rm -f "$$data_dir"/data*.h; \
fi \
fi \
done
@echo "All benchmark data cleaned."

#############
# Verilator #
#############
Expand Down
3 changes: 3 additions & 0 deletions sw/spatzBenchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ if (ELEN EQUAL 64)
add_spatz_test_threeParam(dp-fmatmul dp-fmatmul/main.c 64 64 64 )

add_spatz_test_twoParam_type(dp-gemv gemv/main.c 64 128 64)
add_spatz_test_threeParam_type(dp-sa-gemv sa-gemv/main.c 256 128 16 64)

add_spatz_test_oneParam(dp-faxpy dp-faxpy/main.c 256)
add_spatz_test_oneParam(dp-faxpy dp-faxpy/main.c 1024)
Expand All @@ -121,6 +122,7 @@ if (ELEN EQUAL 64)
add_spatz_test_threeParam(dp-fconv2d dp-fconv2d/main.c 64 64 7)

add_spatz_test_twoParam(dp-fft dp-fft/main.c 128 2)

endif()

add_spatz_test_threeParam(sp-fmatmul sp-fmatmul/main.c 64 64 64 )
Expand Down Expand Up @@ -150,6 +152,7 @@ add_spatz_test_threeParam(sdotp-bp-fmatmul sdotp-bp-fmatmul/main.c 128 256 128)

add_spatz_test_twoParam_type(sp-gemv gemv/main.c 128 128 32)
add_spatz_test_twoParam_type(hp-gemv gemv/main.c 256 128 16)
add_spatz_test_threeParam_type(hp-sa-gemv sa-gemv/main.c 128 4096 512 16)

add_spatz_test_twoParam(sp-fft sp-fft/main.c 256 2)
add_spatz_test_twoParam(sp-fft sp-fft/main.c 512 2)
20 changes: 8 additions & 12 deletions sw/spatzBenchmarks/gemv/script/gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ def array_to_cstr(a, fmt=float):
if isinstance(a, np.ndarray):
a = list(a.flat)
elif isinstance(a, torch.Tensor):
a = a.numpy().flatten().tolist()
# Universal Fix: Cast to float32 before sending to NumPy/C-string to avoid formatting errors
a = a.float().numpy().flatten().tolist()
else:
a = list(a)
for i, el in enumerate(a):
Expand Down Expand Up @@ -121,9 +122,11 @@ def rand_data_generator(shape, prec, alt=False):
return torch.randn(shape, requires_grad=False, dtype=torch.float32), {}
elif prec == 16:
if alt:
return torch.randn(shape, requires_grad=False, dtype=torch.bfloat16), {}
# Universal Fix: Generate FP32, cast to BF16
return torch.randn(shape, requires_grad=False, dtype=torch.float32).to(torch.bfloat16), {}
else:
return torch.randn(shape, requires_grad=False, dtype=torch.float16), {}
# Universal Fix: Generate FP32, cast to FP16
return torch.randn(shape, requires_grad=False, dtype=torch.float32).to(torch.float16), {}
elif prec == 8:
sign = torch.randint(
0, 2, shape, requires_grad=False, dtype=torch.uint8
Expand All @@ -142,15 +145,8 @@ def rand_data_generator(shape, prec, alt=False):


def gemv(a, b):
# PyTorch doesn't support matmul for float16 on CPU, so convert to float32
original_dtype = a.dtype
if original_dtype == torch.float16:
a = a.float()
b = b.float()
result = torch.matmul(a, b)
if original_dtype == torch.float16:
result = result.half()
return result
# Universal Fix: One-liner upcast and downcast
return torch.matmul(a.float(), b.float()).to(a.dtype)


def main():
Expand Down
102 changes: 63 additions & 39 deletions sw/spatzBenchmarks/hp-fmatmul/script/gen_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def array_to_cstr(a, fmt=float):
if isinstance(a, np.ndarray):
a = a.flat
if isinstance(a, torch.Tensor):
# Fallback to float32 if numpy struggles with bfloat16, otherwise leave alone
if a.dtype == torch.bfloat16:
a = a.float()
a = a.cpu().numpy().flat
for el in a:
out += "{}, ".format(el)
Expand Down Expand Up @@ -96,7 +99,7 @@ def emit_conv2d_layer(name="conv2d", **kwargs):
layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n'
layer_str += (
f"static double {name}_checksum[{oh}][{ow}] = "
+ array_to_cstr(torch.sum(ofmap, dim=-1))
+ array_to_cstr(torch.sum(ofmap.float(), dim=-1))
+ ";\n\n\n"
)
layer_str += (
Expand Down Expand Up @@ -215,7 +218,7 @@ def emit_batchnorm_layer(name="batchnorm", **kwargs):
layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n'
layer_str += (
f"static double {name}_checksum[{oh}][{ow}] = "
+ array_to_cstr(torch.sum(ofmap, dim=-1))
+ array_to_cstr(torch.sum(ofmap.float(), dim=-1))
+ ";\n\n\n"
)
layer_str += (
Expand Down Expand Up @@ -263,7 +266,7 @@ def emit_maxpool_layer(name="maxpool", **kwargs):
layer_str += f'static double {name}_result[{oh}][{ow}][{co}] __attribute__((section(".data")));\n\n'
layer_str += (
f"static double {name}_checksum[{oh}][{ow}] = "
+ array_to_cstr(torch.sum(ofmap, dim=-1))
+ array_to_cstr(torch.sum(ofmap.float(), dim=-1))
+ ";\n\n\n"
)
layer_str += (
Expand Down Expand Up @@ -363,14 +366,9 @@ def rand_data_generator(shape, prec, alt=False):
return torch.randn(shape, requires_grad=False, dtype=torch.float32), {}
elif prec == 16:
if alt:
return torch.randn(shape, requires_grad=False, dtype=torch.bfloat16), {}
return torch.randn(shape, requires_grad=False, dtype=torch.float32).to(torch.bfloat16), {}
else:
return (
torch.randn(
shape, requires_grad=False, dtype=torch.float16, device=device
),
{},
)
return torch.randn(shape, requires_grad=False, dtype=torch.float32, device=device).to(torch.float16), {}
elif prec == 8:
sign = torch.randint(
0, 2, shape, requires_grad=False, dtype=torch.uint8
Expand All @@ -389,6 +387,10 @@ def rand_data_generator(shape, prec, alt=False):


def conv2d(ifmap, weights, padding=1, stride=1):
orig_dtype = ifmap.dtype
if orig_dtype in [torch.float16, torch.bfloat16]:
ifmap, weights = ifmap.float(), weights.float()

n, ci, ih, iw = ifmap.shape
co, _, fh, fw = weights.shape

Expand All @@ -399,34 +401,48 @@ def conv2d(ifmap, weights, padding=1, stride=1):
)
ofmap = conv2d(ifmap)

return ofmap
return ofmap.to(orig_dtype)


def max_pooling(ifmap, kernel):
orig_dtype = ifmap.dtype
if orig_dtype in [torch.float16, torch.bfloat16]:
ifmap = ifmap.float()

n, ci, ih, iw = ifmap.shape
max_pool = nn.MaxPool2d(kernel_size=kernel)
ofmap = max_pool(ifmap)

return ofmap
return ofmap.to(orig_dtype)


def batchnorm(ifmap):
orig_dtype = ifmap.dtype
if orig_dtype in [torch.float16, torch.bfloat16]:
ifmap = ifmap.float()

n, ci, ih, iw = ifmap.shape
bn = torch.nn.BatchNorm2d(ci)
bn.weight.requires_grad = False
bn.bias.requires_grad = False
running_mean = torch.randn_like(bn.running_mean, requires_grad=False)
running_var = torch.rand_like(bn.running_var, requires_grad=False)

running_mean = torch.randn(bn.running_mean.shape, requires_grad=False, dtype=torch.float32).to(bn.running_mean.dtype)
running_var = torch.rand(bn.running_var.shape, requires_grad=False, dtype=torch.float32).to(bn.running_var.dtype)

gamma = bn.weight / torch.sqrt(running_var + bn.eps)
beta = bn.bias - running_mean * bn.weight / torch.sqrt(running_var + bn.eps)
ofmap = ifmap * gamma.unsqueeze(-1).unsqueeze(-1) + beta.unsqueeze(-1).unsqueeze(-1)

return ofmap, gamma, beta
return ofmap.to(orig_dtype), gamma.to(orig_dtype), beta.to(orig_dtype)


def fused_conv(
ifmap, weights, bn_k, bn_l, padding, stride, bn, relu, accumulate, depthwise
):
orig_dtype = ifmap.dtype
is_half = orig_dtype in [torch.float16, torch.bfloat16]
if is_half:
ifmap, weights, bn_k, bn_l = ifmap.float(), weights.float(), bn_k.float(), bn_l.float()

ih, iw, ci = ifmap.shape
if not depthwise:
Expand Down Expand Up @@ -459,7 +475,7 @@ def fused_conv(
co,
)
if accumulate:
ofmap_before = torch.randn_like(ofmap, requires_grad=False)
ofmap_before = torch.randn(ofmap.shape, requires_grad=False, dtype=torch.float32).to(ofmap.dtype)
else:
ofmap_before = torch.zeros_like(ofmap, requires_grad=False)

Expand Down Expand Up @@ -499,6 +515,11 @@ def fused_conv(
if relu:
ofmap = torch.nn.functional.relu(ofmap)

if is_half:
ofmap = ofmap.to(orig_dtype)
ofmap_before = ofmap_before.to(orig_dtype)
ifmap_padded = ifmap_padded.to(orig_dtype)

return ofmap, ofmap_before, ifmap_padded


Expand Down Expand Up @@ -538,16 +559,16 @@ def main():
param["input_dim"]["height"],
param["input_dim"]["width"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)
weights = torch.randn(
param["channels"]["out"],
param["channels"]["in"],
param["filter"]["height"],
param["filter"]["width"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)

ofmap = conv2d(
ifmap,
Expand All @@ -568,7 +589,10 @@ def main():
mat_B, bits_B = rand_data_generator((param["K"], param["N"]), param["prec"])
mat_C, bits_C = rand_data_generator((param["M"], param["N"]), param["prec"])

result = torch.matmul(mat_A, mat_B)
if mat_A.dtype in [torch.float16, torch.bfloat16]:
result = torch.matmul(mat_A.float(), mat_B.float()).to(mat_A.dtype)
else:
result = torch.matmul(mat_A, mat_B)

if param["transpose_A"]:
mat_A = mat_A.T
Expand Down Expand Up @@ -602,8 +626,8 @@ def main():
param["input_dim"]["height"],
param["input_dim"]["width"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)

ofmap, gamma, beta = batchnorm(ifmap)

Expand All @@ -621,8 +645,8 @@ def main():
param["input_dim"]["height"],
param["input_dim"]["width"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)

ofmap = max_pooling(ifmap, param["kernel_size"])

Expand All @@ -639,40 +663,40 @@ def main():
param["dim_in_x"],
param["ch_in"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)
if not param["depthwise"]:
kernel = torch.randn(
param["ch_out"],
param["dim_kernel_y"],
param["dim_kernel_x"],
param["ch_in"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)
else:
kernel = torch.randn(
param["dim_kernel_y"],
param["dim_kernel_x"],
param["ch_in"],
requires_grad=False,
dtype=dtype,
)
dtype=torch.float32,
).to(dtype)

bn_k = torch.randn(param["ch_out"], requires_grad=False)
bn_l = torch.randn(param["ch_out"], requires_grad=False)
bn_k = torch.randn(param["ch_out"], requires_grad=False, dtype=torch.float32).to(dtype)
bn_l = torch.randn(param["ch_out"], requires_grad=False, dtype=torch.float32).to(dtype)

ofmap, ofmap_before, ifmap_padded = fused_conv(
ifmap,
kernel,
bn_k,
bn_l,
param["padding"],
param["stride"],
param["flags"]["flag_batch_norm"],
param["flags"]["flag_relu"],
not param["flags"]["flag_y_accumulate_start"],
param["depthwise"],
padding=param["padding"],
stride=param["stride"],
bn=param["flags"]["flag_batch_norm"],
relu=param["flags"]["flag_relu"],
accumulate=not param["flags"]["flag_y_accumulate_start"],
depthwise=param["depthwise"],
)

if param["chw_layer"]:
Expand Down
16 changes: 16 additions & 0 deletions sw/spatzBenchmarks/sa-gemv/data/layer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2025 ETH Zurich and University of Bologna.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include <stdint.h>

typedef enum { FP64 = 8, FP32 = 4, FP16 = 2, FP8 = 1 } precision_t;

typedef struct gemv_layer_struct {
uint32_t M;
uint32_t N;

precision_t dtype;
} gemv_layer;
Loading
Loading