In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"/workdir/source/load-ivy.sc"[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}[39m

<img src="slike/welcome_slide.png" width=100%>


# Chisel
* Constructing Hardware in a Scala Embedded Language
* Chisel is not HLS.
* Type-Safe Meta-Programming for RTL in Scala:
    * Parametrized types
    * Object-oriented programming
    * Functional programming
    * Static type checking

Note: Some of the slides and material were taken from: https://github.com/freechipsproject/chisel-bootcamp

# Example

<center>
<img src="slike/FIR_diagram.png">
</center>

In [10]:
class MovingAverage3(bitWidth: Int) extends Module {
  val io = IO(new Bundle {
    val in = Input(UInt(bitWidth.W))
    val out = Output(UInt(bitWidth.W))
  })
  val z1 = RegNext(io.in) // Create a register whose input is connected to the argument io.in
  val z2 = RegNext(z1)    // Create a register whose input is connected to the argument z1
  io.out := (io.in * 1.U) + (z1 * 1.U) + (z2 * 1.U) // `1.U` is an unsigned literal with value 1
}

defined [32mclass[39m [36mMovingAverage3[39m


<center>
<img src="slike/FIR_diagram.png">
</center>

In [11]:
visualize(() => new MovingAverage3(8))

In [13]:
print(getFirrtl(new MovingAverage3(8)))
//print(getVerilog(new MovingAverage3(8)))

circuit MovingAverage3 :
  module MovingAverage3 :
    input clock : Clock
    input reset : UInt<1>
    output io : { flip in : UInt<8>, out : UInt<8>}

    reg z1 : UInt, clock with :
      reset => (UInt<1>("h0"), z1) @[cmd9.sc 6:19]
    z1 <= io.in @[cmd9.sc 6:19]
    reg z2 : UInt, clock with :
      reset => (UInt<1>("h0"), z2) @[cmd9.sc 7:19]
    z2 <= z1 @[cmd9.sc 7:19]
    node _T = mul(io.in, UInt<1>("h1")) @[cmd9.sc 8:20]
    node _T_1 = mul(z1, UInt<1>("h1")) @[cmd9.sc 8:33]
    node _T_2 = add(_T, _T_1) @[cmd9.sc 8:27]
    node _T_3 = tail(_T_2, 1) @[cmd9.sc 8:27]
    node _T_4 = mul(z2, UInt<1>("h1")) @[cmd9.sc 8:46]
    node _T_5 = add(_T_3, _T_4) @[cmd9.sc 8:40]
    node _T_6 = tail(_T_5, 1) @[cmd9.sc 8:40]
    io.out <= _T_6 @[cmd9.sc 8:10]


# FIR Generator

In [14]:
// Generalized FIR filter parameterized by the convolution coefficients
class FirFilter(bitWidth: Int, coeffs: Seq[UInt]) extends Module {
  val io = IO(new Bundle {
    val in = Input(UInt(bitWidth.W))
    val out = Output(UInt())
  })
  // Create the serial-in, parallel-out shift register
  val zs = Reg(Vec(coeffs.length, UInt(bitWidth.W)))
  zs(0) := io.in
  for (i <- 1 until coeffs.length) {
    zs(i) := zs(i-1)
  }

  // Do the multiplies
  val products = VecInit.tabulate(coeffs.length)(i => zs(i) * coeffs(i))

  // Sum up the products
  io.out := products.reduce(_ +& _)
}

defined [32mclass[39m [36mFirFilter[39m

In [18]:
// same 3-point moving average filter as before
visualize(() => new FirFilter(8, Seq(1.U, 1.U, 1.U)))

// 1-cycle delay as a FIR filter
//visualize(() => new FirFilter(8, Seq(0.U, 1.U)))

// 5-point FIR filter with a triangle impulse response
//visualize(() => new FirFilter(8, Seq(1.U, 2.U, 3.U, 2.U, 1.U)))


# How does chisel4ml use Chisel?

In [19]:
object Neuron {
    def apply[I <: Bits,
              W <: Bits,
              M <: Bits,
              A <: Bits,
              O <: Bits](in: Seq[I],
                         weights: Seq[W],
                         thresh: A,
                         mul: (I, W) => M,
                         add: Vec[M] => A,
                         actFn: (A, A) => O,
                         shift: Int): O = {
        val muls = VecInit((in zip weights).map{
            case (a,b) => mul(a,b)
        })
        val pAct = add(muls)
        val sAct = (pAct << shift.abs).asTypeOf(pAct)
        actFn(sAct, thresh)
    }
}

defined [32mobject[39m [36mNeuron[39m

In [29]:
def mulUQ(i: SInt, w: SInt): SInt = i * w    // Uniform quantization
def addUQ = (x: Vec[SInt]) => x.reduceTree(_ +& _)

def mulBW = (i: SInt, w: Bool) => Mux(w, i, -i)  // Binary weight quantization

def mulBNN(i: Bool, w: Bool): Bool = ~(i ^ w) // Binarized quantization
def addBNN = (x: Vec[Bool]) => PopCount(x.asUInt)

def reluFn(act: SInt, thresh: SInt): UInt = Mux((act - thresh) > 0.S, (act - thresh).asUInt, 0.U)
def signFn(act:UInt, thresh: UInt): Bool = act >= thresh

defined [32mfunction[39m [36mmulUQ[39m
defined [32mfunction[39m [36maddUQ[39m
defined [32mfunction[39m [36mmulBW[39m
defined [32mfunction[39m [36mmulBNN[39m
defined [32mfunction[39m [36maddBNN[39m
defined [32mfunction[39m [36mreluFn[39m
defined [32mfunction[39m [36msignFn[39m

In [34]:
class DummyUniformModule extends Module {
  val io = IO(new Bundle {
    val in = Input(Vec(3, SInt(4.W)))
    val out = Output(UInt())
  })
    io.out := Neuron[SInt, SInt, SInt, SInt, UInt](in = io.in,
                                                  weights = Seq(1.S, -2.S, 3.S),
                                                  thresh = -1.S,
                                                  mul = mulUQ,
                                                  add = addUQ,
                                                  actFn = reluFn,
                                                  shift = 1
                                                 )
}

defined [32mclass[39m [36mDummyUniformModule[39m

In [39]:
//visualize(() => new DummyUniformModule())
//print(getFirrtl(new DummyUniformModule()))
print(getVerilog(new DummyUniformModule()))

Elaborating design...
Done elaborating.
module DummyUniformModule(
  input        clock,
  input        reset,
  input  [3:0] io_in_0,
  input  [3:0] io_in_1,
  input  [3:0] io_in_2,
  output [8:0] io_out
);
  wire [5:0] _T = $signed(io_in_0) * 2'sh1; // @[cmd28.sc 1:39]
  wire [5:0] _T_1 = $signed(io_in_1) * 2'sh2; // @[cmd28.sc 1:39]
  wire [6:0] _T_2 = $signed(io_in_2) * 3'sh3; // @[cmd28.sc 1:39]
  wire [6:0] _WIRE__0 = {{1{_T[5]}},_T}; // @[cmd18.sc 13:27 cmd18.sc 13:27]
  wire [6:0] _WIRE__1 = {{1{_T_1[5]}},_T_1}; // @[cmd18.sc 13:27 cmd18.sc 13:27]
  wire [7:0] _T_3 = $signed(_WIRE__0) + $signed(_WIRE__1); // @[cmd28.sc 2:46]
  wire [7:0] _WIRE_1_1 = {{1{_T_2[6]}},_T_2}; // @[cmd28.sc 2:43 cmd28.sc 2:43]
  wire [8:0] _T_4 = $signed(_T_3) + $signed(_WIRE_1_1); // @[cmd28.sc 2:46]
  wire [9:0] _T_7 = {$signed(_T_4), 1'h0}; // @[cmd18.sc 17:48]
  wire [8:0] _WIRE_3 = _T_7[8:0]; // @[cmd18.sc 17:48 cmd18.sc 17:48]
  wire [8:0] _T_10 = $signed(_WIRE_3) - -9'sh1; // @[cmd28.sc 9:54]
 

In [31]:
class DummyBinarizedModule extends Module {
  val io = IO(new Bundle {
    val in = Input(Vec(3, Bool()))
    val out = Output(UInt())
  })
    io.out := Neuron[Bool, Bool, Bool, UInt, Bool](in = io.in,
                                                  weights = Seq(true.B, false.B, true.B),
                                                  thresh = 2.U,
                                                  mul = mulBNN,
                                                  add = addBNN,
                                                  actFn = signFn,
                                                  shift = 0
                                                 )
}

defined [32mclass[39m [36mDummyBinarizedModule[39m

In [37]:
visualize(() => new DummyBinarizedModule())
//print(getFirrtl(new DummyBinarizedModule()))
//print(getVerilog(new DummyBinarizedModule()))

## Other abstractions in Chisel4ml:
* ProcessingElement == layer
* ProcessingPipeline == model

<p align="center">
<img src="slike/ProcElementC4ml.png" width=450  height=400 align=left>
<img src="slike/ProcPipelineC4ml.png" width=450 height=400 align=right>
</p>