In [None]:
!pip install chisel4ml

In [None]:
import chisel4ml

# Chisel4ml - high-level software architecture

<img src="slike/chisel4ml_architecture.png" width=55%>

# chisel4ml - example

In [9]:
import numpy as np
import qkeras
import tensorflow as tf
from chisel4ml import optimize, generate

w1 = np.array([[1, 2, 3, 4], [-4, -3, -2, -1], [2, -1, 1, 1]])
b1 = np.array([1, 2, 0, 1])
w2 = np.array([-1, 4, -3, -1]).reshape(4, 1)
b2 = np.array([2])

x = x_in = tf.keras.layers.Input(shape=3)
x = qkeras.QActivation(
    qkeras.quantized_bits(bits=4, integer=3, keep_negative=True)
)(x)
x = qkeras.QDense(
    4,
    kernel_quantizer=qkeras.quantized_bits(
        bits=4, integer=3, keep_negative=True, alpha=np.array([0.5, 0.25, 1, 0.25])
    ),
)(x)
x = qkeras.QActivation(qkeras.quantized_relu(bits=3, integer=3))(x)
x = qkeras.QDense(
    1,
    kernel_quantizer=qkeras.quantized_bits(
        bits=4, integer=3, keep_negative=True, alpha=np.array([0.125])
    ),
)(x)
x = qkeras.QActivation(qkeras.quantized_relu(bits=3, integer=3))(x)
model = tf.keras.Model(inputs=[x_in], outputs=[x])
model.compile()
model.layers[2].set_weights([w1, b1])
model.layers[4].set_weights([w2, b2])
data = np.array(
    [
        [0.0, 0.0, 0.0],
        [0.0, 1.0, 2.0],
        [2.0, 1.0, 0.0],
        [4.0, 4.0, 4.0],
        [7.0, 7.0, 7.0],
        [6.0, 0.0, 7.0],
        [3.0, 3.0, 3.0],
        [7.0, 0.0, 0.0],
        [0.0, 7.0, 0.0],
        [0.0, 0.0, 7.0],
    ]
)


opt_model = optimize.qkeras_model(model)
circuit = generate.circuit(opt_model)
for x in data:
    sw_res = opt_model.predict(np.expand_dims(x, axis=0))
    hw_res = circuit(x) # RTL simulation
    assert np.array_equal(sw_res.flatten(), hw_res.flatten())
circuit.delete_from_server()

INFO:root:keras dict linear
INFO:root:keras dict linear
INFO:root:keras dict linear
INFO:root:keras dict linear
INFO:chisel4ml.transforms.qkeras_transforms:Calling transformation <class 'chisel4ml.transforms.qkeras_remove_dead_layers.QKerasRemoveDeadLayers'> on layers:[<class 'keras.engine.input_layer.InputLayer'>].
INFO:chisel4ml.transforms.qkeras_transforms:Calling transformation <class 'chisel4ml.transforms.qkeras_active_qact_fuse.QKerasActiveQActFuse'> on layers:[<class 'qkeras.qlayers.QDense'>, <class 'qkeras.qlayers.QActivation'>].
INFO:chisel4ml.transforms.qkeras_transforms:Calling transformation <class 'chisel4ml.transforms.qkeras_active_qact_fuse.QKerasActiveQActFuse'> on layers:[<class 'qkeras.qlayers.QDense'>, <class 'qkeras.qlayers.QActivation'>].
INFO:chisel4ml.transforms.qkeras_transforms:Calling transformation <class 'chisel4ml.transforms.qkeras_qact_active_fuse.QKerasQActActiveFuse'> on layers:[<class 'qkeras.qlayers.QActivation'>, <class 'qkeras.qlayers.QDense'>].
INFO



INFO:chisel4ml.circuit:Succesfully deleted circuit id: 3


True

# Comparison with hls4ml
Vivado 2019.2 synthesis results for a 4 layer neural network trained on hls4ml\_lhc\_jets\_hl. For more info see _Towards Deploying Highly Quantized Neural Networks on FPGA Using Chisel_ (DSD 2023)
<img src="slike/legenda.png" width=20% align=center>
<p align="center">
<img src="slike/FF_graph.png" width=450  height=400 align=left>
<img src="slike/LUT_graph.png" width=450 height=400 align=right>
</p>
<p align="center">
<img src="slike/freq_graph.png" width=450  height=400 align=left>
<img src="slike/delay_graph.png" width=450 height=400 align=right>
</p>

# Supported operations/layers
Currently chisel4ml the following layers:
* QDense (fully unrolled)
* Sparse QDense
* QDepthwiseConv2D \*
* MaxPool2D
* FFT (Custom layer)
* LMFE (Custom audio features)

# Future work
* Update from chisel 3.5.6 to chisel 6 (generation speed boost)
* Integrate with QONNX
* Classic convolution -> will need a unit to change tensor layout probably
* Folded QDense -> A processing unit matrix-vector multiply.
* Support for skip connections

# Ideas 

* Integration with rocketchip / dsptools -> ASIC prototyping posibility
* Integration with fpga-tidbits -> easier FPGA prototyping


In [10]:
circuit.lbir_model

name: "chisel4ml_model"
layers {
  dense {
    thresh {
      dtype {
        signed: true
        bitwidth: 16
        shift: 0
        offset: 0
      }
      shape: 4
      values: -1.0
      values: -2.0
      values: -0.0
      values: -1.0
    }
    kernel {
      dtype {
        signed: true
        bitwidth: 4
        shift: -1
        shift: -2
        shift: 0
        shift: -2
        offset: 0
      }
      shape: 4
      shape: 3
      values: 1.0
      values: 2.0
      values: 3.0
      values: 4.0
      values: -4.0
      values: -3.0
      values: -2.0
      values: -1.0
      values: 2.0
      values: -1.0
      values: 1.0
      values: 1.0
    }
    input {
      dtype {
        signed: true
        bitwidth: 4
        shift: 0
        offset: 0
      }
      shape: 3
    }
    output {
      dtype {
        bitwidth: 3
        shift: 0
        offset: 0
      }
      shape: 4
    }
    activation: RELU
    rounding_mode: ROUND_HALF_TO_EVEN
  }
}
layers {
  dense {
