In [1]:
from pprint import pprint
import pytorch_quantization
from pytorch_quantization import tensor_quant
import pytorch_quantization.nn as quant_nn
import torch
import torch.nn as nn
import torch.onnx


class TestModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = quant_nn.Linear(8, 4, bias=True,
                                            quant_desc_input=tensor_quant.QUANT_DESC_8BIT_PER_TENSOR,
                                            quant_desc_weight=tensor_quant.QUANT_DESC_8BIT_LINEAR_WEIGHT_PER_ROW)
        self.relu = nn.ReLU()
    def forward(self, x):
        return self.relu(self.linear(x))
model = TestModel()
torch.onnx.export(model, torch.zeros(1, 8), "singlelayer.onnx", verbose=True, opset_version=10)

In [None]:
from onnxruntime.quantization import quantize_dynamic
quantize_dynamic(
        "singlelayer.onnx",
        "singlelayer.quantized.onnx",
    )



In [None]:
import onnx

# Load the ONNX model
onnx_model = onnx.load("hello_world_int8.onnx")
onnx_model = onnx.shape_inference.infer_shapes(onnx_model)
# Check that the model is well formed
onnx.checker.check_model(onnx_model)

# Print a human readable representation of the graph
print(onnx.helper.printable_graph(onnx_model.graph))

graph tf2onnx (
  %serving_default_dense_input:0[INT8, unk__44x1]
) initializers (
  %zero_point__41[INT8, scalar]
  %zero_point__39[INT8, scalar]
  %zero_point__29[INT8, scalar]
  %zero_point__23[INT32, scalar]
  %sequential/dense_2/MatMul[INT8, 16x1]
  %sequential/dense_2/BiasAdd/ReadVariableOp[INT32, 1]
  %sequential/dense_1/MatMul[INT8, 16x16]
  %sequential/dense_1/BiasAdd/ReadVariableOp[INT32, 16]
  %sequential/dense/MatMul[INT8, 1x16]
  %sequential/dense/BiasAdd/ReadVariableOp[INT32, 16]
  %scale__40[FLOAT, scalar]
  %scale__38[FLOAT, scalar]
  %scale__34[FLOAT, scalar]
  %scale__30[FLOAT, scalar]
  %scale__28[FLOAT, scalar]
  %scale__26[FLOAT, scalar]
  %scale__24[FLOAT, scalar]
  %scale__22[FLOAT, scalar]
  %scale__20[FLOAT, scalar]
  %scale__18[FLOAT, scalar]
) {
  %sequential/dense/BiasAdd/ReadVariableOp_dequant = DequantizeLinear(%sequential/dense/BiasAdd/ReadVariableOp, %scale__30, %zero_point__23)
  %sequential/dense/MatMul_dequant = DequantizeLinear(%sequential/dense/MatM

In [None]:
for init in onnx_model.graph.initializer:
    print(init)
for node in onnx_model.graph.node:
    print(node)

data_type: 3
name: "zero_point__41"
raw_data: "\005"

data_type: 3
name: "zero_point__39"
raw_data: "\200"

data_type: 3
name: "zero_point__29"
raw_data: "\000"

data_type: 6
name: "zero_point__23"
raw_data: "\000\000\000\000"

dims: 16
dims: 1
data_type: 3
name: "sequential/dense_2/MatMul"
raw_data: "\331;\'\025\034\340\336\335\017\033\305\327\022\335\371\177"

dims: 1
data_type: 6
name: "sequential/dense_2/BiasAdd/ReadVariableOp"
raw_data: "\255\001\000\000"

dims: 16
dims: 16
data_type: 3
name: "sequential/dense_1/MatMul"
raw_data: "\364\003\373\020\344#\021\335%\357\352\342\333\031\351\010\032\334\'\376\025\n\014\373!\034\032\037\347\036$\"\355\322\335\024\007\"\365\375\035\347\374\351\025\342\346&\t\002\353\332\013$\340\000\n\003\330\327\025\t)!\031\006\333\034\004\036\363\024\333\340\023\312\365\375\000\t!\371\344\370\033\'\007&\035\026\000\342\006\363\007\010\364\364\005\363\032\003\027\021\334\002\304\335\326\024\026\017$\002\032\361\376\346\345\027 \003\330\306\032\335\342\013

In [None]:
init_node = onnx_model.graph.node[0]
init_node.input[1]
onnx_model.graph.initializer
next(i for i in iter(onnx_model.graph.initializer) if i.name == "linear.weight").float_data

[]

In [None]:
from compiler import parsemodel, fpgamodule
spec = fpgamodule.FPGASpec(120, 1000, 10_000, 100_000)
fpga_module = parsemodel.parse_model(onnx_model, 8, 4, spec)
pprint(list(mod for mod in fpga_module.modules))

IndentationError: expected an indented block (parsemodel.py, line 34)

In [None]:
fpga_module.alloc_regs()
fpga_module.alloc_bram()
sv = fpga_module.make_sv()
with open("dummy_model.sv", "w") as f:
    f.write(sv)