# nPE: A Configurable Processing Engine
#### Verification | Version 0.5.1 | Updated 2018.7.26
___

## Setup

In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"""
C:\Users\RyanL\OneDrive\Research\SEAL\processing-engine/source/load-ivy.sc
"""[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

import scala.math.pow

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[39m
[32mimport [39m[36mscala.math.pow[39m

## Parallel Register File

### Single Register File

#### Definition

In [3]:
class RFConfig(
        val addrWidth: Int, 
        val dataWidth: Int,
        val numInputs: Int,
        val bpSupport: Boolean)

class RFControl(c: RFConfig) extends Bundle {
    
    override def cloneType = (new RFControl(c)).asInstanceOf[this.type]
    
    val wEnable = Bool()
    val rEnable = Bool()
    val wAddr = UInt(c.addrWidth.W)
    val rAddrInt = UInt(c.addrWidth.W)
    val rAddrExt = UInt(c.addrWidth.W)
    val bpSel = if (c.bpSupport) Some(Bool()) else None
    val inSel = if (c.numInputs > 1) Some(Vec(c.numInputs, Bool())) else None
}

class RFInput(c: RFConfig) extends Bundle {
    
    override def cloneType = (new RFInput(c)).asInstanceOf[this.type] 
    
    val data = Vec(c.numInputs, SInt(c.dataWidth.W))
}

class RFOutput(c: RFConfig) extends Bundle {
    
    override def cloneType = (new RFOutput(c)).asInstanceOf[this.type]
    
    val int = SInt(c.dataWidth.W)
    val ext = SInt(c.dataWidth.W)
}

class RF(c: RFConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(new RFControl(c))
        val in = Input(new RFInput(c))
        val out = Output(new RFOutput(c))
    })
    
    val zeros = Vec(Seq.fill(pow(2, c.addrWidth).toInt) { 0.S(c.dataWidth.W) })
    val registers = RegInit(zeros)
    
    val sel = io.control.inSel.getOrElse(Vec(true.B))
    
    when (io.control.wEnable) {
        registers(io.control.wAddr) := PriorityMux(sel, io.in.data)
    }
    
    when (io.control.rEnable) {
        when(io.control.bpSel.getOrElse(false.B)) {
            io.out.int := PriorityMux(sel, io.in.data)
            io.out.ext := PriorityMux(sel, io.in.data)
        } .otherwise {
            io.out.int := registers(io.control.rAddrInt)
            io.out.ext := registers(io.control.rAddrExt)
        }
    } .otherwise {
        io.out.int := 0.S
        io.out.ext := 0.S
    }
}

defined [32mclass[39m [36mRFConfig[39m
defined [32mclass[39m [36mRFControl[39m
defined [32mclass[39m [36mRFInput[39m
defined [32mclass[39m [36mRFOutput[39m
defined [32mclass[39m [36mRF[39m

#### Verification: Single Input

In [4]:
val exampleRFConOne = new RFConfig(4, 8, 1, true)

Driver(() => new RF(exampleRFConOne)) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.bpSel.get, false)
        poke(uut.io.control.wEnable, true)   
        poke(uut.io.control.rEnable, true)

        poke(uut.io.control.wAddr, 1)
        poke(uut.io.in.data(0), 1)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        
        poke(uut.io.control.rAddrExt, 1)
        expect(uut.io.out.ext, 1)
        
        poke(uut.io.control.wAddr, 2)
        poke(uut.io.in.data(0), 2)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.out.ext, 2)
        
        poke(uut.io.control.wAddr, 3)
        poke(uut.io.in.data(0), 3)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.out.ext, 2)
        
        poke(uut.io.control.rAddrInt, 3)
        expect(uut.io.out.data.int, 3)
        
        step(1)
        
        poke(uut.io.control.bpSel.get, true)
        poke(uut.io.in.data(0), 10)
        expect(uut.io.out.data.int, 10)
        expect(uut.io.out.data.ext, 10)
        
        poke(uut.io.control.bpSel.get, false)
        expect(uut.io.out.data.int, 3)
        expect(uut.io.out.data.ext, 2)
        
    }
}

[[35minfo[0m] [0.001] Elaborating design...
[[35minfo[0m] [0.123] Done elaborating.
Total FIRRTL Compile Time: 483.6 ms
Total FIRRTL Compile Time: 104.4 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.001] SEED 1532742491574
test cmd2WrapperHelperRF Success: 11 tests passed in 9 cycles taking 0.063285 seconds
[[35minfo[0m] [0.040] RAN 4 CYCLES PASSED


[36mexampleRFConOne[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@70733d1f
[36mres3_1[39m: [32mBoolean[39m = [32mtrue[39m

#### Verification: Multiple Inputs

In [5]:
val exampleRFConTwo = new RFConfig(4, 8, 2, true)

Driver(() => new RF(exampleRFConTwo)) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.bpSel.get, false)
        poke(uut.io.control.wEnable, true)   
        poke(uut.io.control.rEnable, true)

        // Write, input(0)
        poke(uut.io.control.wAddr, 1)
        poke(uut.io.control.inSel.get(0), 1)
        poke(uut.io.control.inSel.get(1), 0)
        poke(uut.io.in.data(0), 1)
        poke(uut.io.in.data(1), 10)
        
        step(1)
        
        // Read
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        poke(uut.io.control.rAddrExt, 1)
        expect(uut.io.out.ext, 1)
        
        // Write, input(1)
        poke(uut.io.control.wAddr, 2)
        poke(uut.io.control.inSel.get(0), 0)
        poke(uut.io.control.inSel.get(1), 1)
        poke(uut.io.in.data(0), 2)
        poke(uut.io.in.data(1), 20)
        
        step(1)
        
        // Read
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.out.ext, 20)
        
        // Write, input(0)
        poke(uut.io.control.wAddr, 3)
        poke(uut.io.control.inSel.get(0), 1)
        poke(uut.io.control.inSel.get(1), 0)
        poke(uut.io.in.data(0), 3)
        poke(uut.io.in.data(1), 30)
        
        step(1)
        
        // Read
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.out.int, 1)
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.out.ext, 20)
        poke(uut.io.control.rAddrInt, 3)
        expect(uut.io.out.int, 3)
        
        step(1)
        
        // Bypass
        poke(uut.io.control.bpSel.get, true)
        
        poke(uut.io.control.inSel.get(0), 1)
        poke(uut.io.control.inSel.get(1), 0)
        poke(uut.io.in.data(0), 5)
        poke(uut.io.in.data(1), 7)
        expect(uut.io.out.int, 5)
        expect(uut.io.out.ext, 5)
        
        poke(uut.io.control.inSel.get(0), 0)
        poke(uut.io.control.inSel.get(1), 1)
        poke(uut.io.in.data(0), 5)
        poke(uut.io.in.data(1), 7)
        expect(uut.io.out.int, 7)
        expect(uut.io.out.ext, 7)
        
        poke(uut.io.control.bpSel.get, false)
        expect(uut.io.out.int, 3)
        expect(uut.io.out.ext, 20)
        
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.014] Done elaborating.
Total FIRRTL Compile Time: 80.5 ms
Total FIRRTL Compile Time: 63.7 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742496273
test cmd2WrapperHelperRF Success: 13 tests passed in 9 cycles taking 0.053391 seconds
[[35minfo[0m] [0.044] RAN 4 CYCLES PASSED


[36mexampleRFConTwo[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@316c559a
[36mres4_1[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [6]:
class PRFConfig(
        val ports: Int,
        val dataWidth: Int,
        val addrWidth: Int,
        val numInputs: Int,
        val bpType: String) {
    
    require(List("None", "Soft", "Hard") contains bpType)
    
    val bpNone = (bpType == "None")
    val bpSoft = (bpType == "Soft")
    val bpHard = (bpType == "Hard")
    
    val rfConfig = new RFConfig(addrWidth, dataWidth, numInputs, bpSoft)
}

class PRFControl(c: PRFConfig) extends Bundle {
    
    override def cloneType = (new PRFControl(c)).asInstanceOf[this.type]
    
    val rf = Vec(c.ports, new RFControl(c.rfConfig))
}

class PRFInput(c: PRFConfig) extends Bundle {
    
    override def cloneType = (new PRFInput(c)).asInstanceOf[this.type]
    
    val rf = Vec(c.ports, new RFInput(c.rfConfig))
}

class PRFOutput(c: PRFConfig) extends Bundle {
    
    override def cloneType = (new PRFOutput(c)).asInstanceOf[this.type]
    
    val rf = Vec(c.ports, new RFOutput(c.rfConfig))
}

class PRF(c: PRFConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(new PRFControl(c))
        val in = Input(new PRFInput(c))
        val out = Output(new PRFOutput(c))
    })
    
    if (c.bpNone || c.bpSoft) {
        
        val rf = Seq.fill(c.ports){ Module(new RF(c.rfConfig)) }
        
        rf.zipWithIndex.map {
            case (x: RF, i: Int) => { 
                x.io.control <> io.control.rf(i)
                x.io.in <> io.in.rf(i)
                io.out.rf(i) <> x.io.out
            } 
        }
        
    } else if (c.bpHard) {
        for (i <- 0 until c.ports) {
            io.out.rf(i).int := io.in.rf(i).data
            io.out.rf(i).ext := io.in.rf(i).data
        }
    }
}

defined [32mclass[39m [36mPRFConfig[39m
defined [32mclass[39m [36mPRFControl[39m
defined [32mclass[39m [36mPRFInput[39m
defined [32mclass[39m [36mPRFOutput[39m
defined [32mclass[39m [36mPRF[39m

#### Verification

In [39]:
val examplePRFCon = new PRFConfig(2, 8, 4, 2, "Soft")

Driver(() => new PRF(examplePRFCon)) {
    uut => new PeekPokeTester(uut) {
         
//         poke(uut.io.control.rf(0).wEnable, true)  
//         poke(uut.io.control.rf(1).wEnable, true) 
//         poke(uut.io.control.rf(0).rEnable, true)
//         poke(uut.io.control.rf(1).rEnable, true)
//         poke(uut.io.control.rf(0).bpSel.get, false)
//         poke(uut.io.control.rf(1).bpSel.get, false)
//         poke(uut.io.control.rf(0).inSel.get(0), true)
//         poke(uut.io.control.rf(1).inSel.get(0), true)
//         poke(uut.io.control.rf(0).inSel.get(1), false)
//         poke(uut.io.control.rf(1).inSel.get(1), false)

//         poke(uut.io.control.rf(0).wAddr, 1)
//         poke(uut.io.control.rf(1).wAddr, 1)
//         poke(uut.io.in.rf(0).data(0), 1)
//         poke(uut.io.in.rf(1).data(0), 1)
        
//         step(1)
        
//         // Read
//         poke(uut.io.control.rf(0).rAddrInt, 1)
//         poke(uut.io.control.rf(1).rAddrInt, 1)
//         expect(uut.io.out.rf(0).int, 1)
//         expect(uut.io.out.rf(1).int, 1)
        
//         poke(uut.io.control.rf(0).rAddrExt, 1)
//         poke(uut.io.control.rf(1).rAddrExt, 1)
//         expect(uut.io.out.rf(0).ext, 1)
//         expect(uut.io.out.rf(1).ext, 1)
        
//         // Write
//         poke(uut.io.control.rf(0).wAddr, 2)
//         poke(uut.io.control.rf(1).wAddr, 2)
//         poke(uut.io.in.rf(0).data(0), 2)
//         poke(uut.io.in.rf(1).data(0), 2)
        
//         step(1)
        
//         // Read
//         poke(uut.io.control.rf(0).rAddrInt, 1)
//         poke(uut.io.control.rf(1).rAddrInt, 1)
//         expect(uut.io.out.rf(0).int, 1)
//         expect(uut.io.out.rf(1).int, 1)
        
//         poke(uut.io.control.rf(0).rAddrExt, 2)
//         poke(uut.io.control.rf(1).rAddrExt, 2)
//         expect(uut.io.out.rf(0).ext, 2)
//         expect(uut.io.out.rf(1).ext, 2)
        
//         // Write
//         poke(uut.io.control.rf(0).wAddr, 3)
//         poke(uut.io.control.rf(1).wAddr, 3)
//         poke(uut.io.in.rf(0).data(0), 3)
//         poke(uut.io.in.rf(1).data(0), 3)
        
//         step(1)
        
//         // Read
//         poke(uut.io.control.rf(0).rAddrInt, 1)
//         poke(uut.io.control.rf(1).rAddrInt, 1)
//         expect(uut.io.out.rf(0).int, 1)
//         expect(uut.io.out.rf(1).int, 1)
        
//         poke(uut.io.control.rf(0).rAddrExt, 2)
//         poke(uut.io.control.rf(1).rAddrExt, 2)
//         expect(uut.io.out.rf(0).ext, 2)
//         expect(uut.io.out.rf(1).ext, 2)
        
//         poke(uut.io.control.rf(0).rAddrInt, 3)
//         poke(uut.io.control.rf(1).rAddrInt, 3)
//         expect(uut.io.out.rf(0).int, 3)
//         expect(uut.io.out.rf(1).int, 3)
        
//         // Bypass
//         poke(uut.io.control.rf(0).bpSel.get, true)
//         poke(uut.io.control.rf(1).bpSel.get, false)
//         poke(uut.io.in.rf(0).data(0), 10)
//         poke(uut.io.in.rf(1).data(0), 10)
//         expect(uut.io.out.rf(0).int, 10)
//         expect(uut.io.out.rf(1).int, 3)
//         expect(uut.io.out.rf(0).ext, 10)
//         expect(uut.io.out.rf(1).ext, 2)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.009] Done elaborating.
Total FIRRTL Compile Time: 38.0 ms
Total FIRRTL Compile Time: 35.4 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532743871741
test cmd5WrapperHelperPRF Success: 0 tests passed in 5 cycles taking 0.002343 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED


[36mexamplePRFCon[39m: [32mPRFConfig[39m = $sess.cmd5Wrapper$Helper$PRFConfig@8e04820
[36mres38_1[39m: [32mBoolean[39m = [32mtrue[39m

## Inner Product Unit

### Parallel Multiplier

#### Definition

In [8]:
class PMultConfig(val numPairs: Int, val bitWidth: Int) {
    require(numPairs >= 1, "Must have at least one pair of multiplicands.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}

class PMultInput(c: PMultConfig) extends Bundle {
    
    override def cloneType = (new PMultInput(c)).asInstanceOf[this.type]
    
    val weight = Vec(c.numPairs, SInt(c.bitWidth.W))
    val actvtn = Vec(c.numPairs, SInt(c.bitWidth.W))
}

class PMult(c: PMultConfig) extends Module {
    
    val io = IO(new Bundle {
        val in = Input(new PMultInput(c))
        val prod = Output(Vec(c.numPairs, SInt(c.bitWidth.W)))
    })
    
    io.prod := (io.in.weight zip io.in.actvtn).map { case(a, b) => a * b }
}

defined [32mclass[39m [36mPMultConfig[39m
defined [32mclass[39m [36mPMultInput[39m
defined [32mclass[39m [36mPMult[39m

#### Verification

In [9]:
val examplePMultCon = new PMultConfig(numPairs = 4, bitWidth = 8)

Driver(() => new PMult(examplePMultCon)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in.weight(0), 1) 
        poke(uut.io.in.actvtn(0), 2)
        
        poke(uut.io.in.weight(1), 3) 
        poke(uut.io.in.actvtn(1), 4)
        
        poke(uut.io.in.weight(2), 5)
        poke(uut.io.in.actvtn(2), 6)
        
        poke(uut.io.in.weight(3), 7)
        poke(uut.io.in.actvtn(3), 8)
        
        expect(uut.io.prod(0), 2)
        expect(uut.io.prod(1), 12)
        expect(uut.io.prod(2), 30)
        expect(uut.io.prod(3), 56)
  }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.011] Done elaborating.
Total FIRRTL Compile Time: 18.9 ms
Total FIRRTL Compile Time: 16.1 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742507972
test cmd7WrapperHelperPMult Success: 4 tests passed in 5 cycles taking 0.006922 seconds
[[35minfo[0m] [0.004] RAN 0 CYCLES PASSED


[36mexamplePMultCon[39m: [32mPMultConfig[39m = $sess.cmd7Wrapper$Helper$PMultConfig@6eb0f2c
[36mres8_1[39m: [32mBoolean[39m = [32mtrue[39m

### Additive Reduction Tree

#### Definition

In [10]:
class ARTreeConfig(val numAddends: Int, val bitWidth: Int) {
    require(numAddends >= 1, "Number of addends must be at least one.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}

// Recursively creates a balanced syntax tree
def adjReduce[A](xs: List[A], op: (A, A) => A): A = xs match {
    case Nil => throw new IllegalArgumentException
    case List(single) => single
    case default => {
        val grouped = default.grouped(2).toList
        val result = for (g <- grouped) yield {
            g match {
                case List(a, b) => op(a, b)
                case List(x) => x
            }
        }
        adjReduce(result, op)
    }
}

class ARTree(c: ARTreeConfig) extends Module {
    
    val io = IO(new Bundle {
        val in  = Input(Vec(c.numAddends, SInt(c.bitWidth.W)))
        val sum = Output(SInt(c.bitWidth.W))
    })
    
    io.sum := adjReduce(io.in toList, (x: SInt, y: SInt) => x + y)
}

defined [32mclass[39m [36mARTreeConfig[39m
defined [32mfunction[39m [36madjReduce[39m
defined [32mclass[39m [36mARTree[39m

#### Verilog

In [11]:
val exampleARTreeCon = new ARTreeConfig(4, 8)
println(getVerilog(new ARTree(exampleARTreeCon)))

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.011] Done elaborating.
Total FIRRTL Compile Time: 116.4 ms

module cmd9WrapperHelperARTree( // @[:@3.2]
  input        clock, // @[:@4.4]
  input        reset, // @[:@5.4]
  input  [7:0] io_in_0, // @[:@6.4]
  input  [7:0] io_in_1, // @[:@6.4]
  input  [7:0] io_in_2, // @[:@6.4]
  input  [7:0] io_in_3, // @[:@6.4]
  output [7:0] io_sum // @[:@6.4]
);
  wire [8:0] _T_12; // @[cmd9.sc 29:63:@8.4]
  wire [7:0] _T_13; // @[cmd9.sc 29:63:@9.4]
  wire [7:0] _T_14; // @[cmd9.sc 29:63:@10.4]
  wire [8:0] _T_15; // @[cmd9.sc 29:63:@11.4]
  wire [7:0] _T_16; // @[cmd9.sc 29:63:@12.4]
  wire [7:0] _T_17; // @[cmd9.sc 29:63:@13.4]
  wire [8:0] _T_18; // @[cmd9.sc 29:63:@14.4]
  wire [7:0] _T_19; // @[cmd9.sc 29:63:@15.4]
  wire [7:0] _T_20; // @[cmd9.sc 29:63:@16.4]
  assign _T_12 = $signed(io_in_0) + $signed(io_in_1); // @[cmd9.sc 29:63:@8.4]
  assign _T_13 = _T_12[7:0]; // @[cmd9.sc 29:63:@9.4]
  assign _T_14 = $signed(_T_13); // @

[36mexampleARTreeCon[39m: [32mARTreeConfig[39m = $sess.cmd9Wrapper$Helper$ARTreeConfig@194208e9

#### Verification

In [12]:
Driver(() => new ARTree(exampleARTreeCon)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
        
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.000] Done elaborating.
Total FIRRTL Compile Time: 12.1 ms
Total FIRRTL Compile Time: 10.8 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742518500
test cmd9WrapperHelperARTree Success: 2 tests passed in 5 cycles taking 0.003123 seconds
[[35minfo[0m] [0.002] RAN 0 CYCLES PASSED


[36mres11[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [13]:
class IPUConfig(val width: Int, val bitWidth: Int, val bpType: String) {
    
    private val bypssError = "Bypass must be \"None\" or \"Firm\""
    private val widthError = "Width must be at least one"
    private val bitWdError = "Data bitwidth must be non-negative"
    
    val supportedBp = List("None", "Firm")
    
    require(width >= 1, widthError)
    require(supportedBp.contains(bpType), bypssError)
    require(bitWidth >= 0, bitWdError)
    
    val childPMultConfig = new PMultConfig(width, bitWidth)
    val childARTreeConfig = new ARTreeConfig(width, bitWidth)
    
    val bpFirm = (bpType == "Firm")
}

class IPUOutput(c: IPUConfig) extends Bundle {
    
    override def cloneType = (new IPUOutput(c)).asInstanceOf[this.type]
    
    val innerProd = Output(SInt(c.bitWidth.W))
    val bpWeight = if (c.bpFirm) Some(SInt(c.bitWidth.W)) else None
    val bpActvtn = if (c.bpFirm) Some(SInt(c.bitWidth.W)) else None
}


class IPU(c: IPUConfig) extends Module {
    
    val cPMConfig = c.childPMultConfig
    val cARTConfig = c.childARTreeConfig
    
    val io = IO(new Bundle {
        val dataIn = Input(new PMultInput(cPMConfig))
        val dataOut = Output(new IPUOutput(c))
        val bpSel = if (c.bpFirm) Some(Input(Vec(c.width, Bool()))) else None
    })
    
    val pMult = Module(new PMult(cPMConfig))
    pMult.io.in <> io.dataIn
    
    val aRTree = Module(new ARTree(cARTConfig))
    aRTree.io.in := pMult.io.prod
    
    io.dataOut.innerProd := aRTree.io.sum
    
    if (c.bpFirm) {
        io.dataOut.bpWeight.get := PriorityMux(io.bpSel.get, io.dataIn.weight)
        io.dataOut.bpActvtn.get := PriorityMux(io.bpSel.get, io.dataIn.actvtn)
    }
}

defined [32mclass[39m [36mIPUConfig[39m
defined [32mclass[39m [36mIPUOutput[39m
defined [32mclass[39m [36mIPU[39m

#### Verification

In [14]:
val exampleIPUCon = new IPUConfig(width = 4, bitWidth = 8, bpType = "Firm")

Driver(() => new IPU(exampleIPUCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 0)
        poke(uut.io.bpSel.get(2), 0)
        poke(uut.io.bpSel.get(3), 0)
        
        poke(uut.io.dataIn.weight(0), 1)
        poke(uut.io.dataIn.weight(1), 2)
        poke(uut.io.dataIn.weight(2), 3)
        poke(uut.io.dataIn.weight(3), 4)
        
        poke(uut.io.dataIn.actvtn(0), 5)
        poke(uut.io.dataIn.actvtn(1), 6)
        poke(uut.io.dataIn.actvtn(2), 7)
        poke(uut.io.dataIn.actvtn(3), 8)
        
        expect(uut.io.dataOut.innerProd, 70)
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 1)
        poke(uut.io.bpSel.get(2), 0)
        poke(uut.io.bpSel.get(3), 0)
        
        expect(uut.io.dataOut.bpWeight.get, 2)
        expect(uut.io.dataOut.bpActvtn.get, 6)
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 0)
        poke(uut.io.bpSel.get(2), 1)
        poke(uut.io.bpSel.get(3), 0)
        
        expect(uut.io.dataOut.bpWeight.get, 3)
        expect(uut.io.dataOut.bpActvtn.get, 7)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.014] Done elaborating.
Total FIRRTL Compile Time: 35.4 ms
Total FIRRTL Compile Time: 25.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742523910
test cmd12WrapperHelperIPU Success: 5 tests passed in 5 cycles taking 0.007497 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED


[36mexampleIPUCon[39m: [32mIPUConfig[39m = $sess.cmd12Wrapper$Helper$IPUConfig@47bb2ed6
[36mres13_1[39m: [32mBoolean[39m = [32mtrue[39m

## ALU

#### Definition

In [15]:
class ALUConfig(val dataWidth: Int, val funcs: List[String]) {
    val identityError = "ALU functions must explicitly include Identity."
    val functionError = "Unsupported Error"
    val supportedFuncs = List("Identity", "Add", "Max", "Accumulate")
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs) { require(supportedFuncs.contains(x), functionError) }
    
    val addSupp = funcs.contains("Add")
    val maxSupp = funcs.contains("Max")
    val accSupp = funcs.contains("Accumulate")
    val addBypassIn = addSupp || maxSupp
    val numFuncs = funcs.length
}

class ALUInput(c: ALUConfig) extends Bundle {
    
    override def cloneType = (new ALUInput(c)).asInstanceOf[this.type]
    
    val innerProd = Input(SInt(c.dataWidth.W))
    val funcSel = Input(Vec(c.numFuncs, Bool()))
    
    val weightBp = if(c.addBypassIn) Some(Input(SInt(c.dataWidth.W))) else None
    val actvtnBp = if(c.addBypassIn) Some(Input(SInt(c.dataWidth.W))) else None
    val rfFeedback = if(c.accSupp) Some(Input(SInt(c.dataWidth.W))) else None
}

class ALU(c: ALUConfig) extends Module {
 
    val io = IO(new Bundle {
        val in = new ALUInput(c)
        val out = Output(SInt(c.dataWidth.W))
    })
    
    val idnOut = Some(Wire(SInt(c.dataWidth.W)))
    val addOut = if(c.addSupp) Some(Wire(SInt(c.dataWidth.W))) else None
    val maxOut = if(c.maxSupp) Some(Wire(SInt(c.dataWidth.W))) else None
    val accOut = if(c.accSupp) Some(Wire(SInt(c.dataWidth.W))) else None
    
    idnOut.get := io.in.innerProd
    
    if (c.addSupp) { addOut.get := io.in.weightBp.get + io.in.actvtnBp.get }
    if (c.accSupp) { accOut.get := io.in.innerProd + io.in.rfFeedback.get }
    if (c.maxSupp) {
        when (io.in.weightBp.get > io.in.actvtnBp.get) {
            maxOut.get := io.in.weightBp.get
        } .otherwise {
            maxOut.get := io.in.actvtnBp.get
        }
    }
    
    val inters = (idnOut :: addOut :: maxOut :: accOut :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.funcSel, inters)
}

defined [32mclass[39m [36mALUConfig[39m
defined [32mclass[39m [36mALUInput[39m
defined [32mclass[39m [36mALU[39m

#### Verification

In [16]:
val exampleALUFuncs = "Identity" :: "Add" :: "Max" :: "Accumulate" :: Nil
val exampleALUCon = new ALUConfig(dataWidth = 8, funcs = exampleALUFuncs)

Driver(() => new ALU(exampleALUCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.innerProd, 1)
        poke(uut.io.in.weightBp.get, 2)
        poke(uut.io.in.actvtnBp.get, 3)
        poke(uut.io.in.rfFeedback.get, 4)
        
        poke(uut.io.in.funcSel(0), 1)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 1)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 1)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 1)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 3)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 1)
        expect(uut.io.out, 5)
    }
}

[[35minfo[0m] [0.003] Elaborating design...
[[35minfo[0m] [0.013] Done elaborating.
Total FIRRTL Compile Time: 19.6 ms
Total FIRRTL Compile Time: 15.5 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742545006
test cmd14WrapperHelperALU Success: 4 tests passed in 5 cycles taking 0.008006 seconds
[[35minfo[0m] [0.006] RAN 0 CYCLES PASSED


[36mexampleALUFuncs[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"Identity"[39m, [32m"Add"[39m, [32m"Max"[39m, [32m"Accumulate"[39m)
[36mexampleALUCon[39m: [32mALUConfig[39m = $sess.cmd14Wrapper$Helper$ALUConfig@6898193d
[36mres15_2[39m: [32mBoolean[39m = [32mtrue[39m

## Nonlinear Unit

In [17]:
class NLUConfig(val dataWidth: Int, val funcs: List[String]) {
    
    val supportedFuncs = List("Identity", "ReLu")
    val identityError = "NLU functions must explicitly include Identity."
    val functionError = "Unsupported Function"
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs)(require(supportedFuncs.contains(x), functionError))
    
    val reluSupp = funcs.contains("ReLu")
    val numFuncs = funcs.length
}

class NLUInputs(c: NLUConfig) extends Bundle {
    
    override def cloneType = (new NLUInputs(c)).asInstanceOf[this.type]
    
    val data = SInt(c.dataWidth.W)
    val fSel = Vec(c.numFuncs, Bool())
}

class NLU(c: NLUConfig) extends Module {
    
    val io = IO(new Bundle {
        val in  = Input(new NLUInputs(c))
        val out = Output(SInt(c.dataWidth.W))
    })
    
    val idRes   = Some(Wire(SInt(c.dataWidth.W)))
    val reluRes = if(c.reluSupp) Some(Wire(SInt(c.dataWidth.W))) else None
    
    idRes.get := io.in.data
    
    if (c.reluSupp) {
        when (io.in.data > 0.S) {
            reluRes.get := io.in.data
        } .otherwise {
            reluRes.get := 0.S
        }
    }
    
    val inters = (idRes :: reluRes :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.fSel, inters)
}

defined [32mclass[39m [36mNLUConfig[39m
defined [32mclass[39m [36mNLUInputs[39m
defined [32mclass[39m [36mNLU[39m

In [18]:
val nluFuncs = "Identity" :: "ReLu" :: Nil
val nluCon = new NLUConfig(dataWidth = 8, funcs = nluFuncs)

Driver(() => new NLU(nluCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.data, 5)
        
        poke(uut.io.in.fSel(0), 1)
        poke(uut.io.in.fSel(1), 0)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.fSel(0), 0)
        poke(uut.io.in.fSel(1), 1)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.data, -4)
        
        poke(uut.io.in.fSel(0), 1)
        poke(uut.io.in.fSel(1), 0)
        expect(uut.io.out, -4)
        
        poke(uut.io.in.fSel(0), 0)
        poke(uut.io.in.fSel(1), 1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.006] Done elaborating.
Total FIRRTL Compile Time: 10.1 ms
Total FIRRTL Compile Time: 7.5 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742557080
test cmd16WrapperHelperNLU Success: 4 tests passed in 5 cycles taking 0.003257 seconds
[[35minfo[0m] [0.004] RAN 0 CYCLES PASSED


[36mnluFuncs[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"Identity"[39m, [32m"ReLu"[39m)
[36mnluCon[39m: [32mNLUConfig[39m = $sess.cmd16Wrapper$Helper$NLUConfig@904b8ac
[36mres17_2[39m: [32mBoolean[39m = [32mtrue[39m

## Control

### State Machine

#### Definition

In [19]:
class StateMachineConfig(
        val numStates: Int, 
        val numCtrlSigs: Int, 
        val stateMap: (UInt, UInt, StateMachineConfig) => UInt) {
    
    val stateWidth = log2Up(numStates)
    val ctrlWidth = log2Up(numCtrlSigs)
}

class StateMachine(c: StateMachineConfig) extends Module {
    
    val stateWidth: Int = log2Up(c.numStates)
    
    val io = IO(new Bundle {
        val control = Input (UInt(c.ctrlWidth.W ))
        val out     = Output(UInt(c.stateWidth.W))
    })
    
    val register = RegInit(0.U(c.stateWidth.W))
    register := c.stateMap(register, io.control, c)
    io.out := register
}

defined [32mclass[39m [36mStateMachineConfig[39m
defined [32mclass[39m [36mStateMachine[39m

#### Example

In [20]:
def exampleStateMap(state: UInt, control: UInt, c: StateMachineConfig): UInt = {
    
    val nextState = Wire(UInt(c.stateWidth.W))
    
    when      (state === 0.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 0.U & control === 1.U) { nextState := 1.U }
    .elsewhen (state === 1.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 1.U & control === 1.U) { nextState := 1.U }
    .otherwise { nextState := 0.U }
    
    nextState
}

defined [32mfunction[39m [36mexampleStateMap[39m

#### Verification

In [21]:
val exampleStateMachineConfig = new StateMachineConfig(2, 2, exampleStateMap)

Driver(() => new StateMachine(exampleStateMachineConfig)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.control, 0)
        expect(uut.io.out, 0)
        
        // 0 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
        
        // 0 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.008] Done elaborating.
Total FIRRTL Compile Time: 14.2 ms
Total FIRRTL Compile Time: 12.3 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532742566012
test cmd18WrapperHelperStateMachine Success: 5 tests passed in 9 cycles taking 0.003020 seconds
[[35minfo[0m] [0.004] RAN 4 CYCLES PASSED


[36mexampleStateMachineConfig[39m: [32mStateMachineConfig[39m = $sess.cmd18Wrapper$Helper$StateMachineConfig@d559fe0
[36mres20_1[39m: [32mBoolean[39m = [32mtrue[39m

### Decoder

#### Definition

In [42]:
class DecoderConfig(
        val weightPRFConfig: PRFConfig,
        val actvtnPRFConfig: PRFConfig,
        val intrnlPRFConfig: PRFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig,
        val smConfig: StateMachineConfig,
        val decodeWeightPRF: (UInt, PRFConfig) => Data,
        val decodeActvtnPRF: (UInt, PRFConfig) => Data,
        val decodeIntrnlPRF: (UInt, PRFConfig) => Data,
        val decodeIPU: (UInt, IPUConfig) => Data,
        val decodeALU: (UInt, ALUConfig) => Data,
        val decodeNLU: (UInt, NLUConfig) => Data)

class MemoryControl(c: DecoderConfig) extends Bundle {
    
    override def cloneType = (new MemoryControl(c)).asInstanceOf[this.type]
    
    val weightPRF = Output(new PRFControl(c.weightPRFConfig))
    val actvtnPRF = Output(new PRFControl(c.actvtnPRFConfig))
    val intrnlPRF = Output(new PRFControl(c.intrnlPRFConfig))
}

class ProcessControl(c: DecoderConfig) extends Bundle {
    
    override def cloneType = (new ProcessControl(c)).asInstanceOf[this.type]
    
    val aluFSel = Output(Vec(c.aluConfig.numFuncs, Bool()))
    val nluFSel = Output(Vec(c.nluConfig.numFuncs, Bool()))
    
    val ports = c.weightPRFConfig.ports
    val bpFirm = c.ipuConfig.bpFirm
    
    val ipuBpSel = if (bpFirm) Some(Output(Vec(ports, Bool()))) else None
}

class Decoder(c: DecoderConfig) extends Module {
    
    val io = IO(new Bundle {
        val state = Input(UInt(c.smConfig.stateWidth.W))
        val mem = Output(new MemoryControl(c))
        val proc = Output(new ProcessControl(c))
    })
    
    io.mem.weightPRF <> c.decodeWeightPRF(io.state, c.weightPRFConfig)
    io.mem.actvtnPRF <> c.decodeActvtnPRF(io.state, c.actvtnPRFConfig)
    io.mem.intrnlPRF <> c.decodeIntrnlPRF(io.state, c.intrnlPRFConfig)
    
    if (c.ipuConfig.bpFirm) { 
        io.proc.ipuBpSel.get := c.decodeIPU(io.state, c.ipuConfig)
    }
    
    io.proc.aluFSel := c.decodeALU(io.state, c.aluConfig)
    io.proc.nluFSel := c.decodeNLU(io.state, c.nluConfig)
}

defined [32mclass[39m [36mDecoderConfig[39m
defined [32mclass[39m [36mMemoryControl[39m
defined [32mclass[39m [36mProcessControl[39m
defined [32mclass[39m [36mDecoder[39m

#### Example

In [26]:
def exampleDecodeWeightPRF(state: UInt, c: PRFConfig) = {
    
    val data = Wire(new PRFControl(c))
    
    when (state === 0.U) {
        data.rf.foreach { k =>
            k.wEnable   := true.B
            k.rEnable   := true.B
            k.wAddr     := 1.U
            k.rAddrInt  := 2.U
            k.rAddrExt  := 3.U
            if (k.bpSel.isDefined) { k.bpSel.get := true.B }
        }
    } .otherwise {
        data.rf.foreach { k =>
            k.wEnable   := false.B
            k.rEnable   := false.B
            k.wAddr     := 4.U
            k.rAddrInt  := 5.U
            k.rAddrExt  := 6.U
            if (k.bpSel.isDefined) { k.bpSel.get := false.B }
        }
    }
    
    data
}

def exampleDecodeActvtnPRF(state: UInt, c: PRFConfig) = {
    
    val data = Wire(new PRFControl(c))
    
    when (state === 0.U) {
        data.rf.foreach { k =>
            k.wEnable   := true.B
            k.rEnable   := true.B
            k.wAddr     := 1.U
            k.rAddrInt  := 2.U
            k.rAddrExt  := 3.U
            if (k.bpSel.isDefined) { k.bpSel.get := true.B }
        }
    } .otherwise {
        data.rf.foreach { k =>
            k.wEnable   := false.B
            k.rEnable   := false.B
            k.wAddr     := 4.U
            k.rAddrInt  := 5.U
            k.rAddrExt  := 6.U
            if (k.bpSel.isDefined) { k.bpSel.get := false.B }
        }
    }
    
    data
}

def exampleDecodeIntrnlPRF(state: UInt, c: PRFConfig) = {
    
    val data = Wire(new PRFControl(c))
    
    when (state === 0.U) {
        data.rf.foreach { k =>
            k.wEnable   := true.B
            k.rEnable   := true.B
            k.wAddr     := 1.U
            k.rAddrInt  := 2.U
            k.rAddrExt  := 3.U
            if(k.bpSel.isDefined) { k.bpSel.get := true.B }
            if(k.inSel.isDefined) { k.inSel.get := Vec(List(true.B, false.B)) }
        }
    } .otherwise {
        data.rf.foreach { k =>
            k.wEnable   := false.B
            k.rEnable   := false.B
            k.wAddr     := 4.U
            k.rAddrInt  := 5.U
            k.rAddrExt  := 6.U
            if(k.bpSel.isDefined) { k.bpSel.get := false.B }
            if(k.inSel.isDefined) { k.inSel.get := Vec(List(false.B, true.B)) }
        }
    }
    
    data
}

def exampleDecodeIPU(state: UInt, c: IPUConfig) = {
    
    val data = Wire(Vec(c.width, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}

def exampleDecodeALU(state: UInt, c: ALUConfig) = {
    
    val data = Wire(Vec(c.numFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: 0.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: 0.U :: 0.U :: Nil)
    }
    
    data
}

def exampleDecodeNLU(state: UInt, c: NLUConfig) = {
    
    val data = Wire(Vec(c.numFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}


defined [32mfunction[39m [36mexampleDecodeWeightPRF[39m
defined [32mfunction[39m [36mexampleDecodeActvtnPRF[39m
defined [32mfunction[39m [36mexampleDecodeIntrnlPRF[39m
defined [32mfunction[39m [36mexampleDecodeIPU[39m
defined [32mfunction[39m [36mexampleDecodeALU[39m
defined [32mfunction[39m [36mexampleDecodeNLU[39m

#### Verification

In [61]:
// TODO: require IPU width == weightPRF width == actvtnPRF width
// TODO: require IPUConfig "Firm" if ALUConfig "Add" or "Max"

val exampleDecoderConfig = new DecoderConfig(
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new PRFConfig(1, 8, 4, 2, "Soft"),
    new IPUConfig(2, 8, "Firm"),
    new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate")),
    new NLUConfig(8, List("Identity", "ReLu")),
    new StateMachineConfig(4, 4, exampleStateMap),
    exampleDecodeWeightPRF,
    exampleDecodeActvtnPRF,
    exampleDecodeIntrnlPRF,
    exampleDecodeIPU,
    exampleDecodeALU,
    exampleDecodeNLU
)


Driver(() => new Decoder(exampleDecoderConfig)) {
    
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.state, 0.U)
        step(1)
        
        expect(uut.io.mem.weightPRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.weightPRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.weightPRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.weightPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.mem.actvtnPRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.actvtnPRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.actvtnPRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.actvtnPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 1)
        expect(uut.io.proc.ipuBpSel.get(1), 0)
        
        expect(uut.io.proc.aluFSel(0), 1)
        expect(uut.io.proc.aluFSel(1), 0)
        expect(uut.io.proc.aluFSel(2), 0)
        expect(uut.io.proc.aluFSel(3), 0)
        
        expect(uut.io.mem.intrnlPRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.intrnlPRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.intrnlPRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.intrnlPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.intrnlPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.intrnlPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.proc.nluFSel(0), 1)
        expect(uut.io.proc.nluFSel(1), 0)
        
        poke(uut.io.state, 1.U) 
        step(1)
        
        expect(uut.io.mem.weightPRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.weightPRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.weightPRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.weightPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.mem.actvtnPRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.actvtnPRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.actvtnPRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.actvtnPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 0)
        expect(uut.io.proc.ipuBpSel.get(1), 1)
        
        expect(uut.io.proc.aluFSel(0), 0)
        expect(uut.io.proc.aluFSel(1), 1)
        expect(uut.io.proc.aluFSel(2), 0)
        expect(uut.io.proc.aluFSel(3), 0)
        
        expect(uut.io.mem.intrnlPRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.intrnlPRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.intrnlPRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.intrnlPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.intrnlPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.intrnlPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.proc.nluFSel(0), 0)
        expect(uut.io.proc.nluFSel(1), 1)
        
    }
}


[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.007] Done elaborating.
Total FIRRTL Compile Time: 21.3 ms
Total FIRRTL Compile Time: 19.1 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532744283984
test cmd41WrapperHelperDecoder Success: 52 tests passed in 7 cycles taking 0.014456 seconds
[[35minfo[0m] [0.011] RAN 2 CYCLES PASSED


[36mexampleDecoderConfig[39m: [32mDecoderConfig[39m = $sess.cmd41Wrapper$Helper$DecoderConfig@1c257d9f
[36mres60_1[39m: [32mBoolean[39m = [32mtrue[39m

## PE

#### Definition

In [None]:
class PEConfig(
        val stateMachineConfig: StateMachineConfig,
        val decoderConfig: DecoderConfig,
        val prfConfig: PRFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig)

class nPE(stateMap: Map[(UInt, UInt), UInt], extrnl_ctrl_width: Int, // State Machine
          decode: (UInt, String) => Data, RFports: Int, weightRFBP: String, actvtnRFBP: String, datawidth: Int, addrwidth: Int,
          aluFuncs: List[String], nluFuncs: List[String], intrnlRFBP: String
         ) extends Module {
    
    val io = IO(new Bundle {
        val extrnl_ctrl   = Input (SInt(extrnl_ctrl_width.W))
        val weightRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_in   = Input (SInt(datawidth.W))
        val weightRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_2NoC = Output(SInt(datawidth.W))
        val output        = Output(SInt(datawidth.W))
    })
    
    val stateMachine = new StateMachine(stateMap, extrnl_ctrl_width)
    stateMachine.io.control := io.extrnl_ctrl
    
    val decoder = new Decoder(decode, log2Up(stateMap.size), 
                              RFports, datawidth, addrwidth, aluFuncs, nluFuncs)
    decoder.io.state := stateMachine.io.state
    
    
    // Weight RF
    val weightRF = new pRF(RFports, weightRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    weightRF.io.in.wEnable
    weightRF.io.in.rEnable
    weightRF.io.in.wAddr
    weightRF.io.in.rAddrInt
    weightRF.io.in.rAddrExt
    
    
    weightRF.io.write_en    := decoder.io.weightRF_wen
    weightRF.io.read_en     := decoder.io.weightRF_ren
    weightRF.io.waddr       := decoder.io.weightRF_waddr
    weightRF.io.raddr_int   := decoder.io.weightRF_raddr_int
    weightRF.io.raddr_ext   := decoder.io.weightRF_raddr_ext
    
    // Optional Control
    if ( weightRF.io.bp_slct.isDefined ) { weightRF.io.bp_slct.get := decoder.io.weightRF_bp_slct_get }
    
    // Mandatory Outputs
    weightRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.weightRF_2NoC  := weightRF.io.rdata_ext
    
    // Activation RF
    val actvtnRF = new pRF(RFports, actvtnRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    actvtnRF.io.write_en    := decoder.io.actvtnRF_wen
    actvtnRF.io.read_en     := decoder.io.actvtnRF_ren
    actvtnRF.io.waddr       := decoder.io.actvtnRF_waddr
    actvtnRF.io.raddr_int   := decoder.io.actvtnRF_raddr_int
    actvtnRF.io.raddr_ext   := decoder.io.actvtnRF_raddr_ext
    
    // Optional Control
    if ( actvtnRF.io.bp_slct.isDefined ) { actvtnRF.io.bp_slct.get := decoder.io.actvtnRF_bp_slct_get }
    
    // Mandatory Outputs
    actvtnRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.actvtnRF_2NoC     := actvtnRF.io.rdata_ext
       
    val ipuBP = if(aluFuncs.contains("Add") || aluFuncs.contains("Max")) "Firm" else "None" 
    val ipu   = new IPU(RFports, ipuBP, datawidth)
    if (ipu.io.sel.isDefined) { ipu.io.sel.get := decoder.io.ipu_sel_get }
    ipu.io.in1 := weightRF.io.rdata_int
    ipu.io.in2 := actvtnRF.io.rdata_int
    
    val alu = new ALU(aluFuncs, datawidth)
    alu.io.func_slct := decoder.io.alu_func_slct
    alu.io.innr_prod := ipu.io.out
    if(alu.io.weight_bp.isDefined) alu.io.weight_bp.get := ipu.io.bp1.get
    if(alu.io.actvtn_bp.isDefined) alu.io.actvtn_bp.get := ipu.io.bp2.get
    
    val intrnlRF = new pRF(1, intrnlRFBP, datawidth, addrwidth)
    intrnlRF.io.write_en  := decoder.io.intrnlRF_write_en
    intrnlRF.io.read_en   := decoder.io.intrnlRF_read_en
    intrnlRF.io.waddr     := decoder.io.intrnlRF_waddr
    intrnlRF.io.raddr_int := decoder.io.intrnlRF_raddr_int
    intrnlRF.io.raddr_ext := decoder.io.intrnlRF_raddr_ext
    if (intrnlRF.io.bp_slct.isDefined) { intrnlRF.io.bp_slct.get := decoder.io.intrnlRF_bp_slct_get }
    intrnlRF.io.wdata := Mux(decoder.io.intrnlRF_wdata_slct, alu.io.output, io.intrnlRF_in)
    io.intrnlRF_2NoC := intrnlRF.io.rdata_ext
    if(alu.io.rf_feedbk.isDefined) alu.io.rf_feedbk.get := intrnlRF.io.rdata_int
    
    val nlu = new NonlinearUnit(nluFuncs, datawidth)
    nlu.io.fslct := decoder.io.nlu_func_slct
    nlu.io.input     := intrnlRF.io.rdata_int
    io.output        := nlu.io.outpt
    
    // Woot woot
}

#### Verification

## Future Plans
* Verify everything using Golden Models