# Chisel4ml - high-level software architecture

<img src="slike/chisel4ml_architecture.png" align="center" width=55%>

# chisel4ml - Train a model in Brevitas (PyTorch)

In [1]:
import chisel4ml
from lhc_model import get_lhc_jets_model
from lhc_data import get_lhc_dataset
from train import train_model, eval_model
import torch
from brevitas.export import export_qonnx
from qonnx.util.cleanup import cleanup_model
from qonnx.core.modelwrapper import ModelWrapper
from server import create_server

In [2]:
brevitas_model = get_lhc_jets_model(bitwidth=4)
train_loader, test_loader = get_lhc_dataset(batch_size=512)
train_model(
        model=brevitas_model,
        train_loader=train_loader,
        criterion=torch.nn.CrossEntropyLoss(),
        optimizer=torch.optim.Adam(brevitas_model.parameters(), lr=0.001),
        epochs=1,
        device='cpu',
        prune_rate=0.5,
    )

  return super().rename(names)


[1/1,  50/1135] - loss: 0.04024
[1/1, 100/1135] - loss: 0.04024
[1/1, 150/1135] - loss: 0.04024
[1/1, 200/1135] - loss: 0.04024
[1/1, 250/1135] - loss: 0.04024
[1/1, 300/1135] - loss: 0.04024
[1/1, 350/1135] - loss: 0.04024
[1/1, 400/1135] - loss: 0.04024
[1/1, 450/1135] - loss: 0.04024
[1/1, 500/1135] - loss: 0.04024
[1/1, 550/1135] - loss: 0.04024
[1/1, 600/1135] - loss: 0.04024
[1/1, 650/1135] - loss: 0.04024
[1/1, 700/1135] - loss: 0.04024
[1/1, 750/1135] - loss: 0.04024
[1/1, 800/1135] - loss: 0.04024
[1/1, 850/1135] - loss: 0.04024
[1/1, 900/1135] - loss: 0.04024
[1/1, 950/1135] - loss: 0.04024
[1/1, 1000/1135] - loss: 0.04024
[1/1, 1050/1135] - loss: 0.04024
[1/1, 1100/1135] - loss: 0.04024
Finished Training


# Evaluate train model

In [3]:
eval_model(brevitas_model, test_loader, 'cpu')

Accuracy of the network on the 487 test input batches: 20.073092369477912 %


0.20073092369477913

# Export the model to QONNX

In [4]:
qonnx_proto = export_qonnx(brevitas_model, torch.randn(brevitas_model.ishape))
qonnx_model = ModelWrapper(qonnx_proto)
qonnx_model = cleanup_model(qonnx_model)

In [5]:
import IPython
import netron

qonnx_model.save('model.onnx')

addr = 'localhost'
port = 5555
netron.start('model.onnx', (addr, port), browse=False)
IPython.display.IFrame(f'http://{addr}:{port}', width=1200, height=600)

Serving 'model.onnx' at http://localhost:5555


# Create chisel4ml circuit

In [6]:
from chisel4ml.transform import qonnx_to_lbir
from chisel4ml import generate

lbir_model = qonnx_to_lbir(qonnx_model)
accelerators = generate.accelerators(
    lbir_model,
    minimize="delay",
)
c4ml_server, c4ml_subp = create_server("/c4ml/chisel4ml.jar")
circuit = generate.circuit(
    accelerators,
    lbir_model,
    use_verilator=True,
    gen_timeout_sec=9000,
    server=c4ml_server,
)

[main] INFO chisel4ml.Chisel4mlServer - Started a new chisel4ml-server on port 54933, using temporary directory: /tmp/chisel4mlezuq7q9t.
[grpc-default-executor-1] INFO chisel4ml.Chisel4mlServer - Started generating hardware for circuit id:0 in temporary directory /tmp/chisel4mlezuq7q9t with a timeout of 9000 seconds.
[Thread-1] INFO chisel4ml.Circuit - Used annotations for generated circuit are: List(TargetDirAnnotation(../tmp/chisel4mlezuq7q9t/circuit0), NoCircuitDedupAnnotation, VerilatorBackendAnnotation).
[Thread-1] INFO class chisel4ml.ProcessingElementCombToSeq - Generated new class chisel4ml.ProcessingElementCombToSeq module.
LayerWrapSeqField$->Vector(DenseConfig(QTensor(Datatype(UNIFORM,true,4,Vector(0),Vector(0),UnknownFieldSet(Map())),Vector(1, 64),Vector(-0.0, 0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, 0.0, -0.0, -0.0, 0.0, -0.0, -0.0, 0.0, -0.0, 0.0, -0.0, 0.0, -0.0, -0.0, -0.0, -0.0, -0.0, 0.0, 0.0, -0.0, 0.0, 0.0, -0.0, -0.0, 

In [7]:
import numpy as np
res = circuit(np.zeros(16))
print(res)

[0. 0. 0. 0. 0.]


[grpc-default-executor-1] INFO chisel4ml.Chisel4mlServer - Simulating circuit id: 0 circuit on 1 input/s.
[Thread-3] INFO chisel4ml.Circuit - Simulating a sequential circuit on a new input. Input shape: Vector(16), input dtype: Datatype(UNIFORM,true,8,Vector(0),Vector(0),UnknownFieldSet(Map())), output stencil: QTensor(Datatype(UNIFORM,true,4,Vector(0),Vector(0),UnknownFieldSet(Map())),Vector(5),Vector(),ROUND,,UnknownFieldSet(Map())).


# Comparison with hls4ml


<img src="slike/experiment.png" width=80% align=center>

Vivado 2023.1 synthesis results for a 4 layer neural network trained on hls4ml\_lhc\_jets\_hl. For more info see _Generating Direct Logic Circuit Implementations of Deeply Quantized Neural Networks Using Chisel4ml_

# Convolutional neural networks with different bitwidth of quantization - hls4ml vs chisel4ml
<p align="center">
    <img src="slike/lut_plot.png" width=400  height=400 align=left>
    <img src="slike/delay_plot.png" width=400  height=400 align=center>
    <img src="slike/syn_time_plot.png" width=400 height=400 align=right>
</p>

# Thank you for your attention