In [1]:
import onnx
from onnx import helper
from onnx import TensorProto
import numpy as np

In [2]:
def make_layer(in_dim, out_dim, idx):
    # Create one input (ValueInfoPro    to)
    X = helper.make_tensor_value_info(f"X_{idx}", TensorProto.INT8, [in_dim])
    w = helper.make_tensor(f"weight_{idx}", TensorProto.INT8, [out_dim, in_dim], np.random.randint(-128, 128, (out_dim, in_dim)).astype(np.int8).tobytes(), raw=True)
    b = helper.make_tensor(f"bias_{idx}", TensorProto.INT32, [out_dim], np.random.randint(-128, 128, (out_dim)).astype(np.int32).tobytes(),  raw=True)

    # Create one output (ValueInfoProto)
    Y = helper.make_tensor_value_info(f"Y_{idx}", TensorProto.INT8, [out_dim])

    mmnode = helper.make_node(
        "MatMulInteger",
        [f"X_{idx}", f"weight_{idx}"],
        [f"XMM32_{idx}"],
        name=f"mm_{idx}"
    )

    biasnode = helper.make_node(
        "Add",
        [f"XMM32_{idx}", f"bias_{idx}"],
        [f"XMMB32_{idx}"],
        name=f"add_{idx}"
    )

    castnode = helper.make_node( #FIXME: cast truncates bits
        "Cast",
        [f"XMMB32_{idx}"],
        [f"XMMB8_{idx}"],
        to=TensorProto.INT8
    )

    relunode = helper.make_node(
        "Relu",
        [f"XMMB8_{idx}"],
        [f"Y_{idx}"],
        name=f"relu_{idx}"
    )

    # Create the graph (GraphProto)
    graph_def = helper.make_graph(
        [mmnode,  biasnode, castnode, relunode],
        f"testmodel_{idx}",
        [X],
        [Y],
        [w, b]
    )

    # Create the model (ModelProto)
    opset = onnx.OperatorSetIdProto()
    opset.version = 19
    return helper.make_model(graph_def, opset_imports = [opset])

model_def = make_layer(8, 8, 0)
print(f"The model is:\n{model_def}")
onnx.checker.check_model(model_def)
onnx.shape_inference.infer_shapes(model_def, check_type=True, strict_mode=True, data_prop=True)
print("The model is checked!")

The model is:
ir_version: 9
graph {
  node {
    input: "X_0"
    input: "weight_0"
    output: "XMM32_0"
    name: "mm_0"
    op_type: "MatMulInteger"
  }
  node {
    input: "XMM32_0"
    input: "bias_0"
    output: "XMMB32_0"
    name: "add_0"
    op_type: "Add"
  }
  node {
    input: "XMMB32_0"
    output: "XMMB8_0"
    op_type: "Cast"
    attribute {
      name: "to"
      i: 3
      type: INT
    }
  }
  node {
    input: "XMMB8_0"
    output: "Y_0"
    name: "relu_0"
    op_type: "Relu"
  }
  name: "testmodel_0"
  initializer {
    dims: 8
    dims: 8
    data_type: 3
    name: "weight_0"
    raw_data: "\022\326D\005p\332\204\227r.\322F\351:i\031\242\252J\261$\363\333`\233\265\374&\202\022\txw\334\274\332\000\302A\225\347>\2307\374\226v\346\311\323\363F\035\260\n \312\312d\260\013\266,\002"
  }
  initializer {
    dims: 8
    data_type: 6
    name: "bias_0"
    raw_data: "\270\377\377\377\230\377\377\377\375\377\377\377\367\377\377\377\016\000\000\000b\000\000\000\034\000\000\0

In [3]:
def make_multilayer(n_layers, layer_widths = None):
    if layer_widths == None:
        layer_widths = [n_layers for _ in range(2*n_layers)]
    layers = [make_layer(layer_widths[2*i], layer_widths[2*i+1], i) for i in range(n_layers)]
    n_params = sum(layer_widths[2*i]*layer_widths[2*i + 1] + layer_widths[2*i+1] for i in range(n_layers//2))
    model = layers[0]
    for idx, l in enumerate(layers[1:-1]):
        model = onnx.compose.merge_models(model, l, [(f"Y_{idx}", f"X_{idx+1}")])
    if n_layers > 1:
        model = onnx.compose.merge_models(model, layers[-1], [(f"Y_{n_layers-2}", f"X_{n_layers-1}")])
    return model, n_params
print([make_multilayer(i)[1] for i in range(2, 11)])
onnx.checker.check_model(make_multilayer(3)[0])
print(list(make_multilayer(3, [N, N, N, N, N, N, N])[1] for N in range(10, 250)))

[6, 12, 40, 60, 126, 168, 288, 360, 550]
[110, 132, 156, 182, 210, 240, 272, 306, 342, 380, 420, 462, 506, 552, 600, 650, 702, 756, 812, 870, 930, 992, 1056, 1122, 1190, 1260, 1332, 1406, 1482, 1560, 1640, 1722, 1806, 1892, 1980, 2070, 2162, 2256, 2352, 2450, 2550, 2652, 2756, 2862, 2970, 3080, 3192, 3306, 3422, 3540, 3660, 3782, 3906, 4032, 4160, 4290, 4422, 4556, 4692, 4830, 4970, 5112, 5256, 5402, 5550, 5700, 5852, 6006, 6162, 6320, 6480, 6642, 6806, 6972, 7140, 7310, 7482, 7656, 7832, 8010, 8190, 8372, 8556, 8742, 8930, 9120, 9312, 9506, 9702, 9900, 10100, 10302, 10506, 10712, 10920, 11130, 11342, 11556, 11772, 11990, 12210, 12432, 12656, 12882, 13110, 13340, 13572, 13806, 14042, 14280, 14520, 14762, 15006, 15252, 15500, 15750, 16002, 16256, 16512, 16770, 17030, 17292, 17556, 17822, 18090, 18360, 18632, 18906, 19182, 19460, 19740, 20022, 20306, 20592, 20880, 21170, 21462, 21756, 22052, 22350, 22650, 22952, 23256, 23562, 23870, 24180, 24492, 24806, 25122, 25440, 25760, 26082, 26406,

In [7]:
from compiler import parsemodel, fpgamodule
WIDTH = 4
DEPTH = 3
onnx_model = make_multilayer(DEPTH, [WIDTH for i in range(2*DEPTH + 1)])[0]
spec = fpgamodule.FPGASpec(120, 600_000, 2_700_000, 100_000)
fpga_module = parsemodel.parse_model(onnx_model, WIDTH, spec)
fpga_module.alloc_regs()
fpga_module.alloc_bram()
sv = fpga_module.make_sv()
with open("dummy_model.sv", "w") as f:
    f.write(sv)
print("done generating")
onnx.save(onnx_model, "testmodel.onnx")

done generating


In [8]:
import onnxruntime as ort
from onnx.reference import ReferenceEvaluator

providers = ["CPUExecutionProvider"]
print(ort.get_available_providers())
options = ort.SessionOptions()
options.enable_profiling=False
sess = ReferenceEvaluator("testmodel.onnx", verbose = 4)
x_test = np.ones(WIDTH).astype(np.int8)
res = sess.run([f"Y_{DEPTH-1}"], {"X_0": x_test})
print(res)

['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']
 +C weight_0: int8:(4, 4):-83,-118,-2,22,93...
 +C bias_0: int32:(4,):[-58, -69, 30, -79]
 +C weight_1: int8:(4, 4):82,125,24,8,-113...
 +C bias_1: int32:(4,):[102, 75, 103, -67]
 +C weight_2: int8:(4, 4):29,67,-63,-127,-15...
 +C bias_2: int32:(4,):[-73, -89, 28, -85]
 +I X_0: int8:(4,):[1, 1, 1, 1]
MatMulInteger(X_0, weight_0) -> XMM32_0
 + XMM32_0: int32:(4,):[235, -296, 78, 74]
Add(XMM32_0, bias_0) -> XMMB32_0
 + XMMB32_0: int32:(4,):[177, -365, 108, -5]
Cast(XMMB32_0) -> XMMB8_0
 + XMMB8_0: int8:(4,):[-79, -109, 108, -5]
Relu(XMMB8_0) -> Y_0
 + Y_0: int8:(4,):[0, 0, 108, 0]
MatMulInteger(Y_0, weight_1) -> XMM32_1
 + XMM32_1: int32:(4,):[-12744, -6696, 432, 10152]
Add(XMM32_1, bias_1) -> XMMB32_1
 + XMMB32_1: int32:(4,):[-12642, -6621, 535, 10085]
Cast(XMMB32_1) -> XMMB8_1
 + XMMB8_1: int8:(4,):[-98, 35, 23, 101]
Relu(XMMB8_1) -> Y_1
 + Y_1: int8:(4,):[0, 35, 23, 101]
MatMulInteger(Y_1, weight_2) -> XMM3

In [9]:
for init in onnx_model.graph.initializer:
    print(onnx.numpy_helper.to_array(init).astype(np.int8))

[[ -83 -118   -2   22]
 [  93  -70  -60  -29]
 [ 126  -34   36  -42]
 [  99  -74  104  123]]
[-58 -69  30 -79]
[[  82  125   24    8]
 [-113  -16   -6   55]
 [-118  -62    4   94]
 [ -62    6 -112 -117]]
[102  75 103 -67]
[[  29   67  -63 -127]
 [ -15   32  -37  -35]
 [ -63 -110   40   38]
 [  14   83   29   26]]
[-73 -89  28 -85]
