# nPE: A Configurable Processing Engine
#### Verification | Version 0.5.1 | Updated 2018.7.26
___

## Setup

In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"""
C:\Users\RyanL\OneDrive\Research\SEAL\processing-engine/source/load-ivy.sc
"""[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

import scala.math.pow

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[39m
[32mimport [39m[36mscala.math.pow[39m

## Parallel Register File

### Single Register File

#### Definition

In [84]:
class RFControl(addrWidth: Int, bpSupp: Boolean) extends Bundle {
    
    override def cloneType = (new RFControl(addrWidth, bpSupp)).asInstanceOf[this.type]
    
    val wEnable  = Bool()
    val rEnable  = Bool()
    val wAddr    = UInt(addrWidth.W)
    val rAddrInt = UInt(addrWidth.W)
    val rAddrExt = UInt(addrWidth.W)
    val bpSel    = if (bpSupp) Some(Bool()) else None
}

class RFOutputs(dataWidth: Int) extends Bundle {
    
    override def cloneType = (new RFOutputs(dataWidth)).asInstanceOf[this.type]
    
    val int = SInt(dataWidth.W)
    val ext = SInt(dataWidth.W)
}

class RF(dataWidth: Int, addrWidth: Int, bpSupp: Boolean) extends Module {
  
    val io = IO(new Bundle {
        val control = Input(new RFControl(addrWidth, bpSupp))
        val dataIn  = Input(SInt(dataWidth.W))
        val dataOut = Output(new RFOutputs(dataWidth))
    })
    
    val registers = RegInit(Vec(Seq.fill(pow(2, addrWidth).toInt) { 0.S(addrWidth.W) }))
    
    when (io.control.wEnable) {
        registers(io.control.wAddr) := io.dataIn
    }
    
    when (io.control.rEnable) {
        when(io.control.bpSel.getOrElse(false.B)) {
            io.dataOut.int := io.dataIn
            io.dataOut.ext := io.dataIn
        } .otherwise {
            io.dataOut.int := registers(io.control.rAddrInt)
            io.dataOut.ext := registers(io.control.rAddrExt)
        }
    } .otherwise {
        io.dataOut.int := 0.S
        io.dataOut.ext := 0.S
    }
}

defined [32mclass[39m [36mRFControl[39m
defined [32mclass[39m [36mRFOutputs[39m
defined [32mclass[39m [36mRF[39m

#### Verification

In [87]:
Driver(() => new RF(8, 4, true)) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.bpSel.get, false)
        poke(uut.io.control.wEnable, true)   
        poke(uut.io.control.rEnable, true)

        poke(uut.io.control.wAddr, 1)
        poke(uut.io.dataIn, 1)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 1)
        expect(uut.io.dataOut.ext, 1)
        
        poke(uut.io.control.wAddr, 2)
        poke(uut.io.dataIn, 2)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.dataOut.ext, 2)
        
        poke(uut.io.control.wAddr, 3)
        poke(uut.io.dataIn, 3)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.dataOut.ext, 2)
        
        poke(uut.io.control.rAddrInt, 3)
        expect(uut.io.dataOut.int, 3)
        
        step(1)
        
        poke(uut.io.control.bpSel.get, true)
        poke(uut.io.dataIn, 10)
        expect(uut.io.dataOut.int, 10)
        expect(uut.io.dataOut.ext, 10)
        
        poke(uut.io.control.bpSel.get, false)
        expect(uut.io.dataOut.int, 3)
        expect(uut.io.dataOut.ext, 2)
        
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.003] Done elaborating.
Total FIRRTL Compile Time: 35.5 ms
Total FIRRTL Compile Time: 30.8 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532642051802
test cmd83WrapperHelperRF Success: 11 tests passed in 9 cycles taking 0.016142 seconds
[[35minfo[0m] [0.013] RAN 4 CYCLES PASSED


[36mres86[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [90]:
class PRFControl(ports: Int, addrWidth: Int, bpSoft: Boolean) extends Bundle {
    
    override def cloneType = (new PRFControl(ports, addrWidth, bpSoft)).asInstanceOf[this.type]
    
    val rf = Vec(ports, new RFControl(addrWidth, bpSoft))
}

class PRF(ports: Int, dataWidth: Int, addrWidth: Int, bp: String) extends Module {
    
    require(List("None", "Soft", "Hard") contains bp)
    
    val bpNone = (bp == "None")
    val bpSoft = (bp == "Soft")
    val bpHard = (bp == "Hard")
    
    val io = IO(new Bundle {
        val control = Input(new PRFControl(ports, addrWidth, bpSoft))
        val dataIn = Input(Vec(ports, SInt(dataWidth.W)))
        val dataOut = Output(Vec(ports, new RFOutputs(dataWidth)))
    })
    
    if (bpNone || bpSoft) {
        
        val rf = Seq.fill(ports){ Module(new RF(dataWidth, addrWidth, bpSoft)) }
        
        rf.zipWithIndex.map {
            case (x: RF, i: Int) => { 
                x.io.control <> io.control.rf(i)
                x.io.dataIn := io.dataIn(i)
                
                io.dataOut(i) <> x.io.dataOut
            } 
        }
        
    } else if (bpHard) {
        for (i <- 0 until ports) {
            io.dataOut(i).int := io.dataIn(i)
            io.dataOut(i).ext := io.dataIn(i)
        }
    }
}

defined [32mclass[39m [36mPRFControl[39m
defined [32mclass[39m [36mPRF[39m

#### Verification

In [91]:
Driver(() => new PRF(2, 8, 4, "Soft")) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.rf(0).wEnable, true)  
        poke(uut.io.control.rf(1).wEnable, true) 
        poke(uut.io.control.rf(0).rEnable, true)
        poke(uut.io.control.rf(1).rEnable, true)
        poke(uut.io.control.rf(0).bpSel.get, false)
        poke(uut.io.control.rf(1).bpSel.get, false)

        poke(uut.io.control.rf(0).wAddr, 1)
        poke(uut.io.control.rf(1).wAddr, 1)
        poke(uut.io.dataIn(0), 1)
        poke(uut.io.dataIn(1), 1)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 1)
        poke(uut.io.control.rf(1).rAddrExt, 1)
        expect(uut.io.dataOut(0).ext, 1)
        expect(uut.io.dataOut(1).ext, 1)
        
        // Write
        poke(uut.io.control.rf(0).wAddr, 2)
        poke(uut.io.control.rf(1).wAddr, 2)
        poke(uut.io.dataIn(0), 2)
        poke(uut.io.dataIn(1), 2)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 2)
        poke(uut.io.control.rf(1).rAddrExt, 2)
        expect(uut.io.dataOut(0).ext, 2)
        expect(uut.io.dataOut(1).ext, 2)
        
        // Write
        poke(uut.io.control.rf(0).wAddr, 3)
        poke(uut.io.control.rf(1).wAddr, 3)
        poke(uut.io.dataIn(0), 3)
        poke(uut.io.dataIn(1), 3)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 2)
        poke(uut.io.control.rf(1).rAddrExt, 2)
        expect(uut.io.dataOut(0).ext, 2)
        expect(uut.io.dataOut(1).ext, 2)
        
        poke(uut.io.control.rf(0).rAddrInt, 3)
        poke(uut.io.control.rf(1).rAddrInt, 3)
        expect(uut.io.dataOut(0).int, 3)
        expect(uut.io.dataOut(1).int, 3)
        
        // Bypass
        poke(uut.io.control.rf(0).bpSel.get, true)
        poke(uut.io.control.rf(1).bpSel.get, false)
        poke(uut.io.dataIn(0), 10)
        poke(uut.io.dataIn(1), 10)
        expect(uut.io.dataOut(0).int, 10)
        expect(uut.io.dataOut(1).int, 3)
        expect(uut.io.dataOut(0).ext, 10)
        expect(uut.io.dataOut(1).ext, 2)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.019] Done elaborating.
Total FIRRTL Compile Time: 100.5 ms
Total FIRRTL Compile Time: 53.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532642544269
test cmd89WrapperHelperPRF Success: 18 tests passed in 8 cycles taking 0.039429 seconds
[[35minfo[0m] [0.034] RAN 3 CYCLES PASSED


[36mres90[39m: [32mBoolean[39m = [32mtrue[39m

## Inner Product Unit

### Parallel Multiplier

#### Definition

In [23]:
def checkParamsPMult(numPairs: Int, bitWidth: Int) {
    require(numPairs >= 1, "Must have at least one pair of multiplicands.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}

class PMultInputs(numPairs: Int, bitWidth: Int) extends Bundle {
    val left  = Input(Vec(numPairs, SInt(bitWidth.W)))
    val right = Input(Vec(numPairs, SInt(bitWidth.W)))
}

class PMult(numPairs: Int, bitWidth: Int) extends Module {
    
    val io = IO(new Bundle {
        val in   = new PMultInputs(numPairs, bitWidth)
        val prod = Output(Vec(numPairs, SInt(bitWidth.W)))
    })
    
    io.prod := (io.in.left zip io.in.right).map { case(a, b) => a * b }
}

defined [32mfunction[39m [36mcheckParamsPMult[39m
defined [32mclass[39m [36mPMultInputs[39m
defined [32mclass[39m [36mPMult[39m

#### Verification

In [24]:
Driver(() => new PMult(4, 8)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in.left(0), 1) 
        poke(uut.io.in.right(0), 2)
        
        poke(uut.io.in.left(1), 3) 
        poke(uut.io.in.right(1), 4)
        
        poke(uut.io.in.left(2), 5)
        poke(uut.io.in.right(2), 6)
        
        poke(uut.io.in.left(3), 7)
        poke(uut.io.in.right(3), 8)
        
        expect(uut.io.prod(0), 2)
        expect(uut.io.prod(1), 12)
        expect(uut.io.prod(2), 30)
        expect(uut.io.prod(3), 56)
  }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.007] Done elaborating.
Total FIRRTL Compile Time: 11.5 ms
Total FIRRTL Compile Time: 8.4 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532630375683
test cmd22WrapperHelperPMult Success: 4 tests passed in 5 cycles taking 0.003763 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED


[36mres23[39m: [32mBoolean[39m = [32mtrue[39m

### Additive Reduction Tree

#### Definition

In [25]:
// Recursively creates a balanced syntax tree
def nonassocPairwiseReduce[A](xs: List[A], op: (A, A) => A): A = {
  xs match {
    case Nil => throw new IllegalArgumentException
    case List(singleElem) => singleElem
    case sthElse => {
      val grouped = sthElse.grouped(2).toList
      val pairwiseOpd = for (g <- grouped) yield {
        g match {
          case List(a, b) => op(a, b)
          case List(x) => x
        }
      }
      nonassocPairwiseReduce(pairwiseOpd, op)
    }
  }
}

def checkParamsAdditiveRT(numAddends: Int, bitWidth: Int): Unit = {
    require(numAddends >= 1, "Number of addends must be at least one.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}


class AdditiveRT(numAddends: Int, bitWidth: Int) extends Module {

    checkParamsAdditiveRT(numAddends, bitWidth)
    
    val io = IO(new Bundle {
        val in  = Input(Vec(numAddends, SInt(bitWidth.W)))
        val sum = Output(SInt(bitWidth.W))
    })
    
    io.sum := nonassocPairwiseReduce(io.in toList, (x: SInt, y: SInt) => x + y)
}

defined [32mfunction[39m [36mnonassocPairwiseReduce[39m
defined [32mfunction[39m [36mcheckParamsAdditiveRT[39m
defined [32mclass[39m [36mAdditiveRT[39m

#### Verilog

In [16]:
println(getVerilog(new AdditiveRT(4, 4)))
println(getVerilog(new AdditiveRT(6, 4)))

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.006] Done elaborating.
Total FIRRTL Compile Time: 129.9 ms

module cmd14WrapperHelperAdditiveRT( // @[:@3.2]
  input        clock, // @[:@4.4]
  input        reset, // @[:@5.4]
  input  [3:0] io_in_0, // @[:@6.4]
  input  [3:0] io_in_1, // @[:@6.4]
  input  [3:0] io_in_2, // @[:@6.4]
  input  [3:0] io_in_3, // @[:@6.4]
  output [3:0] io_out // @[:@6.4]
);
  wire [4:0] _T_12; // @[cmd14.sc 29:76:@8.4]
  wire [3:0] _T_13; // @[cmd14.sc 29:76:@9.4]
  wire [3:0] _T_14; // @[cmd14.sc 29:76:@10.4]
  wire [4:0] _T_15; // @[cmd14.sc 29:76:@11.4]
  wire [3:0] _T_16; // @[cmd14.sc 29:76:@12.4]
  wire [3:0] _T_17; // @[cmd14.sc 29:76:@13.4]
  wire [4:0] _T_18; // @[cmd14.sc 29:76:@14.4]
  wire [3:0] _T_19; // @[cmd14.sc 29:76:@15.4]
  wire [3:0] _T_20; // @[cmd14.sc 29:76:@16.4]
  assign _T_12 = $signed(io_in_0) + $signed(io_in_1); // @[cmd14.sc 29:76:@8.4]
  assign _T_13 = _T_12[3:0]; // @[cmd14.sc 29:76:@9.4]
  assign _T_14 = $sig

#### Verification

In [26]:
Driver(() => new AdditiveRT(4, 8)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
        
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.006] Done elaborating.
Total FIRRTL Compile Time: 7.9 ms
Total FIRRTL Compile Time: 8.1 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532630396193
test cmd24WrapperHelperAdditiveRT Success: 2 tests passed in 5 cycles taking 0.002699 seconds
[[35minfo[0m] [0.004] RAN 0 CYCLES PASSED


[36mres25[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [31]:
def checkParamsIPU(width: Int, bitWidth: Int, bp: String) {
    val bpError = "Bypass must be \"None\" or \"Firm\""
    val widthError = "Width must be at least one"
    val bitWidthError = "Data bitwidth must be non-negative"
    
    val bpTypes = List("None", "Firm")
    
    require(width >= 1, widthError)
    require(bpTypes.contains(bp), bpError)
    require(bitWidth >= 0, bitWidthError)
}

class IPUInputs(width: Int, bitWidth: Int, bpFirm: Boolean) extends Bundle {
    val data = new PMultInputs(width, bitWidth)
    val sel  = if (bpFirm) Some(Input(Vec(width, Bool()))) else None
}

class IPUOutputs(bitWidth: Int, bpFirm: Boolean) extends Bundle {
    val innerProd = Output(SInt(bitWidth.W))
    
    val bpLeft  = if (bpFirm) Some(Output(SInt(bitWidth.W)))  else None
    val bpRight = if (bpFirm) Some(Output(SInt(bitWidth.W)))  else None
}


class IPU(width: Int, bitWidth: Int, bypassType: String) extends Module {
    
    checkParamsIPU(width, bitWidth, bypassType)
    
    val bpFirm = (bypassType == "Firm")
    
    val io = IO(new Bundle {
        val in  = new IPUInputs(width, bitWidth, bpFirm)
        val out = new IPUOutputs(bitWidth, bpFirm)
    })
    
    val pMult = Module(new PMult(width, bitWidth))
    pMult.io.in <> io.in.data
    
    val additiveRT = Module(new AdditiveRT(width, bitWidth))
    additiveRT.io.in := pMult.io.prod
    
    io.out.innerProd := additiveRT.io.sum
    
    if (bpFirm) {
        io.out.bpLeft.get  := PriorityMux(io.in.sel.get, io.in.data.left)
        io.out.bpRight.get := PriorityMux(io.in.sel.get, io.in.data.right)
    }
}

defined [32mfunction[39m [36mcheckParamsIPU[39m
defined [32mclass[39m [36mIPUInputs[39m
defined [32mclass[39m [36mIPUOutputs[39m
defined [32mclass[39m [36mIPU[39m

#### Verification

In [34]:
Driver(() => new IPU(width=4, bypassType="Firm", bitWidth=8)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.sel.get(0), 0)
        poke(uut.io.in.sel.get(1), 0)
        poke(uut.io.in.sel.get(2), 0)
        poke(uut.io.in.sel.get(3), 0)
        
        poke(uut.io.in.data.left(0), 1)
        poke(uut.io.in.data.left(1), 2)
        poke(uut.io.in.data.left(2), 3)
        poke(uut.io.in.data.left(3), 4)
        
        poke(uut.io.in.data.right(0), 5)
        poke(uut.io.in.data.right(1), 6)
        poke(uut.io.in.data.right(2), 7)
        poke(uut.io.in.data.right(3), 8)
        
        expect(uut.io.out.innerProd, 70)
        
        poke(uut.io.in.sel.get(0), 0)
        poke(uut.io.in.sel.get(1), 1)
        poke(uut.io.in.sel.get(2), 0)
        poke(uut.io.in.sel.get(3), 0)
        
        expect(uut.io.out.bpLeft.get, 2)
        expect(uut.io.out.bpRight.get, 6)
        
        poke(uut.io.in.sel.get(0), 0)
        poke(uut.io.in.sel.get(1), 0)
        poke(uut.io.in.sel.get(2), 1)
        poke(uut.io.in.sel.get(3), 0)
        
        expect(uut.io.out.bpLeft.get, 3)
        expect(uut.io.out.bpRight.get, 7)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.008] Done elaborating.
Total FIRRTL Compile Time: 30.7 ms
Total FIRRTL Compile Time: 20.8 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532631762495
test cmd30WrapperHelperIPU Success: 5 tests passed in 5 cycles taking 0.009510 seconds
[[35minfo[0m] [0.004] RAN 0 CYCLES PASSED


[36mres33[39m: [32mBoolean[39m = [32mtrue[39m

## ALU

#### Definition

In [37]:
def checkparamsALU(datawidth: Int, funcs: List[String]) {
    val identityError = "ALU functions must explicitly include Identity."
    val functionError = "Unsupported Error"
    val supportedFuncs = List("Identity", "Add", "Max", "Accumulate")
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs) { require(supportedFuncs.contains(x), functionError) }
}

class ALUInputs(dataWidth: Int, numFuncs: Int, accSupp: Boolean, addBypassIn: Boolean) extends Bundle {
    val innerProd = Input(SInt(dataWidth.W))
    val funcSel = Input(Vec(numFuncs, Bool()))
    
    val weightBp     = if(addBypassIn) Some(Input(SInt(dataWidth.W))) else None
    val actvtnBp     = if(addBypassIn) Some(Input(SInt(dataWidth.W))) else None
    val rfFeedback   = if(accSupp)     Some(Input(SInt(dataWidth.W))) else None
}

class ALU(dataWidth: Int, funcs: List[String]) extends Module {
    
    checkparamsALU(dataWidth, funcs)
    
    val addSupp = funcs.contains("Add")
    val maxSupp = funcs.contains("Max")
    val accSupp = funcs.contains("Accumulate")
    val addBypassIn = addSupp || maxSupp
 
    val io = IO(new Bundle {
        val in = new ALUInputs(dataWidth, funcs.length, accSupp, addBypassIn)
        val out = Output(SInt(dataWidth.W))
    })
    
    val idnOut = Some(Wire(SInt(dataWidth.W)))
    val addOut = if(addSupp) Some(Wire(SInt(dataWidth.W))) else None
    val maxOut = if(maxSupp) Some(Wire(SInt(dataWidth.W))) else None
    val accOut = if(accSupp) Some(Wire(SInt(dataWidth.W))) else None
    
    idnOut.get := io.in.innerProd
    
    if (addSupp) { addOut.get := io.in.weightBp.get + io.in.actvtnBp.get }
    if (accSupp) { accOut.get := io.in.innerProd + io.in.rfFeedback.get }
    if (maxSupp) {
        when (io.in.weightBp.get > io.in.actvtnBp.get) {
            maxOut.get := io.in.weightBp.get
        } .otherwise {
            maxOut.get := io.in.actvtnBp.get
        }
    }
    
    val inters = (idnOut :: addOut :: maxOut :: accOut :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.funcSel, inters)
}

defined [32mfunction[39m [36mcheckparamsALU[39m
defined [32mclass[39m [36mALUInputs[39m
defined [32mclass[39m [36mALU[39m

#### Verification

In [41]:
val funcs = "Identity" :: "Add" :: "Max" :: "Accumulate" :: Nil

Driver(() => new ALU(8, funcs)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.innerProd, 1)
        poke(uut.io.in.weightBp.get, 2)
        poke(uut.io.in.actvtnBp.get, 3)
        poke(uut.io.in.rfFeedback.get, 4)
        
        poke(uut.io.in.funcSel(0), 1)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 1)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 1)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 1)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 3)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 1)
        expect(uut.io.out, 5)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.000] Done elaborating.
Total FIRRTL Compile Time: 10.9 ms
Total FIRRTL Compile Time: 9.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532633052133
test cmd36WrapperHelperALU Success: 4 tests passed in 5 cycles taking 0.003677 seconds
[[35minfo[0m] [0.005] RAN 0 CYCLES PASSED


[36mfuncs[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"Identity"[39m, [32m"Add"[39m, [32m"Max"[39m, [32m"Accumulate"[39m)
[36mres40_1[39m: [32mBoolean[39m = [32mtrue[39m

## Nonlinear Unit

In [45]:
def checkparamsNLU(datawidth: Int, funcs: List[String]) {
    val supportedFuncs = List("Identity", "ReLu")
    val identityError = "NLU functions must explicitly include Identity."
    val functionError = "Unsupported Function"
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs)(require(supportedFuncs.contains(x), functionError))
}

class NLUInputs(dataWidth: Int, reluSupp: Boolean) {
    val data = Input(SInt(dataWidth.W))
    val fSel = Input(Vec(funcs.length, Bool()))
}

class NLU(dataWidth: Int, funcs: List[String]) extends Module {
    
    checkparamsNLU(dataWidth, funcs)
    
    val reluSupp = funcs.contains("ReLu")
    
    val io = IO(new Bundle {
        val in  = new NLUInputs(dataWidth, reluSupp)
        val out = Output(SInt(dataWidth.W))
    })
    
    val idRes   = Some(Wire(SInt(dataWidth.W)))
    val reluRes = if(reluSupp) Some(Wire(SInt(dataWidth.W))) else None
    
    idRes.get := io.in.data
    
    if (reluSupp) {
        when (io.in.data > 0.S) {
            reluRes.get := io.data
        } .otherwise {
            reluRes.get := 0.S
        }
    }
    
    val inters = (idRes :: reluRes :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.fSel, inters)
}

defined [32mfunction[39m [36mcheckparamsNLU[39m
defined [32mclass[39m [36mNLUInputs[39m
defined [32mclass[39m [36mNLU[39m

## Control

### State Machine

#### Definition

In [340]:
class StateMachine(numStates: Int, nextState: (UInt, UInt, Int) => UInt, ctrlWidth: Int) extends Module {
    
    val stateWidth: Int = log2Up(numStates)
    
    val io = IO(new Bundle {
        val control = Input (UInt(ctrlWidth.W ))
        val out     = Output(UInt(stateWidth.W))
    })
    
    val register = RegInit(0.U(stateWidth.W))
    register := nextState(register, io.control, ctrlWidth)
    io.out := register
}

defined [32mclass[39m [36mStateMachine[39m

#### Example

In [341]:
def stateMap(state: UInt, control: UInt, stateWidth: Int): UInt = {
    
    val nextState = Wire(UInt(stateWidth.W))
    
    when      (state === 0.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 0.U & control === 1.U) { nextState := 1.U }
    .elsewhen (state === 1.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 1.U & control === 1.U) { nextState := 1.U }
    .otherwise { nextState := 0.U }
    
    nextState
}

defined [32mfunction[39m [36mstateMap[39m

#### Verification

In [343]:
Driver(() => new StateMachine(2, stateMap, 4)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.control, 0)
        expect(uut.io.out, 0)
        
        // 0 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
        
        // 0 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 10.1 ms
Total FIRRTL Compile Time: 12.5 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532561171036
test cmd339WrapperHelperStateMachine Success: 5 tests passed in 9 cycles taking 0.004398 seconds
[[35minfo[0m] [0.004] RAN 4 CYCLES PASSED


[36mres342[39m: [32mBoolean[39m = [32mtrue[39m

### Decoder

#### Definition

In [78]:
class Decoder(
    ports: Int,
    datawidth: Int,
    addrwidth: Int,
    statewidth: Int,
    aluFuncs: List[String], 
    nluFuncs: List[String],
    decode: (UInt, String) => Data)
  extends Module {
    
    val io = IO(new Bundle {
        
        val state = Input(UInt(statewidth.W))
        
        val weightPRFControl = Output(new PRFControl(ports, addrwidth))
        val actvtnPRFControl = Output(new PRFControl(ports, addrwidth))
        
        val ipuSelGet = Output(Vec(ports, Bool()))
        val aluFuncSel = Output(Vec(aluFuncs.length, Bool()))
        
        val intrnlPRFControl = Output(new PRFControl(ports, addrwidth))
        val intrnlPRFDataSel = Output(Bool())
        
        val nluFuncSel = Output(Vec(nluFuncs.length, Bool()))
        
    })
    
    // Refactor this to use bulk connections
    io.weightPRFControl.wEnable  := decode(io.state, "weightPRF wEnable")
    io.weightPRFControl.rEnable  := decode(io.state, "weightPRF rEnable")
    io.weightPRFControl.wAddr    := decode(io.state, "weightPRF wAddr")
    io.weightPRFControl.rAddrInt := decode(io.state, "weightPRF rAddrInt")
    io.weightPRFControl.rAddrExt := decode(io.state, "weightPRF rAddrExt")
    io.weightPRFControl.bpSelGet := decode(io.state, "weightPRF bpSelGet")
    
    io.actvtnPRFControl.wEnable  := decode(io.state, "actvtnPRF wEnable")
    io.actvtnPRFControl.rEnable  := decode(io.state, "actvtnPRF rEnable")
    io.actvtnPRFControl.wAddr    := decode(io.state, "actvtnPRF wAddr")
    io.actvtnPRFControl.rAddrInt := decode(io.state, "actvtnPRF rAddrInt")
    io.actvtnPRFControl.rAddrExt := decode(io.state, "actvtnPRF rAddrExt")
    io.actvtnPRFControl.bpSelGet := decode(io.state, "actvtnPRF bpSelGet")
    
    io.ipuSelGet  := decode(io.state, "ipuSelGet")
    io.aluFuncSel := decode(io.state, "aluFuncSel")
    
    io.intrnlPRFControl.wEnable  := decode(io.state, "intrnlPRF wEnable")
    io.intrnlPRFControl.rEnable  := decode(io.state, "intrnlPRF rEnable")
    io.intrnlPRFControl.wAddr    := decode(io.state, "intrnlPRF wAddr")
    io.intrnlPRFControl.rAddrInt := decode(io.state, "intrnlPRF rAddrInt")
    io.intrnlPRFControl.rAddrExt := decode(io.state, "intrnlPRF rAddrExt")
    io.intrnlPRFControl.bpSelGet := decode(io.state, "intrnlPRF bpSelGet")
    io.intrnlPRFDataSel          := decode(io.state, "intrnlPRF dataSel")
    
    io.nluFuncSel := decode(io.state, "nluFuncSel")
}

cmd78.sc:15: not enough arguments for constructor PRFControl: (ports: Int, addrWidth: Int, bpSoft: Boolean)cmd78Wrapper.this.cmd75.wrapper.PRFControl.
Unspecified value parameter bpSoft.
        val weightPRFControl = Output(new PRFControl(ports, addrwidth))
                                      ^cmd78.sc:16: not enough arguments for constructor PRFControl: (ports: Int, addrWidth: Int, bpSoft: Boolean)cmd78Wrapper.this.cmd75.wrapper.PRFControl.
Unspecified value parameter bpSoft.
        val actvtnPRFControl = Output(new PRFControl(ports, addrwidth))
                                      ^cmd78.sc:21: not enough arguments for constructor PRFControl: (ports: Int, addrWidth: Int, bpSoft: Boolean)cmd78Wrapper.this.cmd75.wrapper.PRFControl.
Unspecified value parameter bpSoft.
        val intrnlPRFControl = Output(new PRFControl(ports, addrwidth))
                                      ^

: 

#### Example

In [48]:
def decode(state: UInt, output: String): Data = {
    
    // Set Types
    val data = output match {
        case "weightPRF wEnable"  => Wire(Vec(2, Bool()))
        case "weightPRF rEnable"  => Wire(Vec(2, Bool()))
        case "weightPRF wAddr"    => Wire(Vec(2, UInt(8.W)))
        case "weightPRF rAddrInt" => Wire(Vec(2, UInt(8.W)))
        case "weightPRF rAddrExt" => Wire(Vec(2, UInt(8.W)))
        case "weightPRF bpSelGet" => Wire(Vec(2, Bool()))
        
        case "actvtnPRF wEnable"  => Wire(Vec(2, Bool()))
        case "actvtnPRF rEnable"  => Wire(Vec(2, Bool()))
        case "actvtnPRF wAddr"    => Wire(Vec(2, UInt(8.W)))
        case "actvtnPRF rAddrInt" => Wire(Vec(2, UInt(8.W)))
        case "actvtnPRF rAddrExt" => Wire(Vec(2, UInt(8.W)))
        case "actvtnPRF bpSelGet" => Wire(Vec(2, Bool()))
        
        case "ipuSelGet"         => Wire(Vec(2, Bool()))
        case "aluFuncSel"        => Wire(Vec(4, Bool()))
         
        case "intrnlPRF wEnable"  => Wire(Vec(2, Bool()))
        case "intrnlPRF rEnable"  => Wire(Vec(2, Bool()))
        case "intrnlPRF wAddr"    => Wire(Vec(2, UInt(8.W)))
        case "intrnlPRF rAddrInt" => Wire(Vec(2, UInt(8.W)))
        case "intrnlPRF rAddrExt" => Wire(Vec(2, UInt(8.W)))
        case "intrnlPRF bpSelGet" => Wire(Vec(2, Bool()))
        case "intrnlPRF dataSel"  => Wire(Bool())
        
        case "nluFuncSel"        => Wire(Vec(2, Bool()))
                                    
    }
    
    // Set Values
    when(state === 0.U) {
        data := { output match {
            case "weightPRF wEnable"  => Vec.fill(2){true.B}
            case "weightPRF rEnable"  => Vec.fill(2){true.B}
            case "weightPRF wAddr"    => Vec.fill(2){1.U}
            case "weightPRF rAddrInt" => Vec.fill(2){2.U}
            case "weightPRF rAddrExt" => Vec.fill(2){3.U}
            case "weightPRF bpSelGet" => Vec.fill(2){true.B}
            
            case "actvtnPRF wEnable"  => Vec.fill(2){true.B}
            case "actvtnPRF rEnable"  => Vec.fill(2){true.B}
            case "actvtnPRF wAddr"    => Vec.fill(2){1.U}
            case "actvtnPRF rAddrInt" => Vec.fill(2){2.U}
            case "actvtnPRF rAddrExt" => Vec.fill(2){3.U}
            case "actvtnPRF bpSelGet" => Vec.fill(2){true.B}
            
            case "ipuSelGet"         => Vec(1.U :: 0.U :: Nil)
            case "aluFuncSel"        => Vec(1.U :: 0.U :: 0.U :: 0.U :: Nil)
            
            case "intrnlPRF wEnable"  => Vec.fill(2){true.B}
            case "intrnlPRF rEnable"  => Vec.fill(2){true.B}
            case "intrnlPRF wAddr"    => Vec.fill(2){1.U}
            case "intrnlPRF rAddrInt" => Vec.fill(2){2.U}
            case "intrnlPRF rAddrExt" => Vec.fill(2){3.U}
            case "intrnlPRF bpSelGet" => Vec.fill(2){true.B}
            case "intrnlPRF dataSel"  => true.B
            
            case "nluFuncSel"        => Vec(1.U :: 0.U :: Nil)
        }}
    } 

    .otherwise {
        data := { output match {
            case "weightPRF wEnable"  => Vec.fill(2){false.B}
            case "weightPRF rEnable"  => Vec.fill(2){false.B}
            case "weightPRF wAddr"    => Vec.fill(2){4.U}
            case "weightPRF rAddrInt" => Vec.fill(2){5.U}
            case "weightPRF rAddrExt" => Vec.fill(2){6.U}
            case "weightPRF bpSelGet" => Vec.fill(2){false.B}
            
            case "actvtnPRF wEnable"  => Vec.fill(2){false.B}
            case "actvtnPRF rEnable"  => Vec.fill(2){false.B}
            case "actvtnPRF wAddr"    => Vec.fill(2){4.U}
            case "actvtnPRF rAddrInt" => Vec.fill(2){5.U}
            case "actvtnPRF rAddrExt" => Vec.fill(2){6.U}
            case "actvtnPRF bpSelGet" => Vec.fill(2){false.B}
            
            case "ipuSelGet"         => Vec(0.U :: 1.U :: Nil)
            case "aluFuncSel"        => Vec(0.U :: 1.U :: 0.U :: 0.U :: Nil)
            
            case "intrnlPRF wEnable"  => Vec.fill(2){false.B}
            case "intrnlPRF rEnable"  => Vec.fill(2){false.B}
            case "intrnlPRF wAddr"    => Vec.fill(2){4.U}
            case "intrnlPRF rAddrInt" => Vec.fill(2){5.U}
            case "intrnlPRF rAddrExt" => Vec.fill(2){6.U}
            case "intrnlPRF bpSelGet" => Vec.fill(2){false.B}
            case "intrnlPRF dataSel"  => false.B
            
            case "nluFuncSel"        => Vec(0.U :: 1.U :: Nil)
        }}
    }
    
    data
}

defined [32mfunction[39m [36mdecode[39m

#### Verification

In [49]:
Driver(() => new Decoder(decode, statewidth=4, ports=2,
                        datawidth=4, addrwidth=4,
                        aluFuncs=List("Identity", "Add", "Max", "Accumulate"),
                        nluFuncs=List("Identity", "ReLu"))) {
    
    uut => new PeekPokeTester(uut) {
        
        
        poke(uut.io.state, 0.U)
        step(1)
        
        expect(uut.io.weightPRFControl.wEnable(0), true.B)
        expect(uut.io.weightPRFControl.rEnable(0), true.B)
        expect(uut.io.weightPRFControl.wAddr(0), 1.U)
        expect(uut.io.weightPRFControl.rAddrInt(0), 2.U)
        expect(uut.io.weightPRFControl.rAddrExt(0), 3.U)
        expect(uut.io.weightPRFControl.bpSelGet(0), true.B)
        
        expect(uut.io.actvtnPRFControl.wEnable(0), true.B)
        expect(uut.io.actvtnPRFControl.rEnable(0), true.B)
        expect(uut.io.actvtnPRFControl.wAddr(0), 1.U)
        expect(uut.io.actvtnPRFControl.rAddrInt(0), 2.U)
        expect(uut.io.actvtnPRFControl.rAddrExt(0), 3.U)
        expect(uut.io.actvtnPRFControl.bpSelGet(0), true.B)
        
        expect(uut.io.ipuSelGet(0), 1)
        expect(uut.io.ipuSelGet(1), 0)
        
        expect(uut.io.aluFuncSel(0), 1)
        expect(uut.io.aluFuncSel(1), 0)
        expect(uut.io.aluFuncSel(2), 0)
        expect(uut.io.aluFuncSel(3), 0)
        
        expect(uut.io.intrnlPRFControl.wEnable(0), true.B)
        expect(uut.io.intrnlPRFControl.rEnable(0), true.B)
        expect(uut.io.intrnlPRFControl.wAddr(0), 1.U)
        expect(uut.io.intrnlPRFControl.rAddrInt(0), 2.U)
        expect(uut.io.intrnlPRFControl.rAddrExt(0), 3.U)
        expect(uut.io.intrnlPRFControl.bpSelGet(0), true.B)
        
        expect(uut.io.nluFuncSel(0), 1)
        expect(uut.io.nluFuncSel(1), 0)
        
        poke(uut.io.state, 1.U) 
        step(1)
        
        expect(uut.io.weightPRFControl.wEnable(0), false.B)
        expect(uut.io.weightPRFControl.rEnable(0), false.B)
        expect(uut.io.weightPRFControl.wAddr(0), 4.U)
        expect(uut.io.weightPRFControl.rAddrInt(0), 5.U)
        expect(uut.io.weightPRFControl.rAddrExt(0), 6.U)
        expect(uut.io.weightPRFControl.bpSelGet(0), false.B)
        
        expect(uut.io.actvtnPRFControl.wEnable(0), false.B)
        expect(uut.io.actvtnPRFControl.rEnable(0), false.B)
        expect(uut.io.actvtnPRFControl.wAddr(0), 4.U)
        expect(uut.io.actvtnPRFControl.rAddrInt(0), 5.U)
        expect(uut.io.actvtnPRFControl.rAddrExt(0), 6.U)
        expect(uut.io.actvtnPRFControl.bpSelGet(0), false.B)
        
        expect(uut.io.ipuSelGet(0), 0)
        expect(uut.io.ipuSelGet(1), 1)
        
        expect(uut.io.aluFuncSel(0), 0)
        expect(uut.io.aluFuncSel(1), 1)
        expect(uut.io.aluFuncSel(2), 0)
        expect(uut.io.aluFuncSel(3), 0)
        
        expect(uut.io.intrnlPRFControl.wEnable(0), false.B)
        expect(uut.io.intrnlPRFControl.rEnable(0), false.B)
        expect(uut.io.intrnlPRFControl.wAddr(0), 4.U)
        expect(uut.io.intrnlPRFControl.rAddrInt(0), 5.U)
        expect(uut.io.intrnlPRFControl.rAddrExt(0), 6.U)
        expect(uut.io.intrnlPRFControl.bpSelGet(0), false.B)
        
        expect(uut.io.nluFuncSel(0), 0)
        expect(uut.io.nluFuncSel(1), 1)
        
    }
}


[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.044] Done elaborating.
Total FIRRTL Compile Time: 76.8 ms
Total FIRRTL Compile Time: 62.9 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532634311657
test cmd45WrapperHelperDecoder Success: 52 tests passed in 7 cycles taking 0.033776 seconds
[[35minfo[0m] [0.025] RAN 2 CYCLES PASSED


[36mres48[39m: [32mBoolean[39m = [32mtrue[39m

## PE

#### Definition

In [None]:
class nPE(stateMap: Map[(UInt, UInt), UInt], extrnl_ctrl_width: Int, // State Machine
          decode: (UInt, String) => Data, RFports: Int, weightRFBP: String, actvtnRFBP: String, datawidth: Int, addrwidth: Int,
          aluFuncs: List[String], nluFuncs: List[String], intrnlRFBP: String
         ) extends Module {
    
    val io = IO(new Bundle {
        val extrnl_ctrl   = Input (SInt(extrnl_ctrl_width.W))
        val weightRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_in   = Input (SInt(datawidth.W))
        val weightRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_2NoC = Output(SInt(datawidth.W))
        val output        = Output(SInt(datawidth.W))
    })
    
    val stateMachine = new StateMachine(stateMap, extrnl_ctrl_width)
    stateMachine.io.control := io.extrnl_ctrl
    
    val decoder = new Decoder(decode, log2Up(stateMap.size), 
                              RFports, datawidth, addrwidth, aluFuncs, nluFuncs)
    decoder.io.state := stateMachine.io.state
    
    
    // Weight RF
    val weightRF = new pRF(RFports, weightRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    weightRF.io.in.wEnable
    weightRF.io.in.rEnable
    weightRF.io.in.wAddr
    weightRF.io.in.rAddrInt
    weightRF.io.in.rAddrExt
    
    
    weightRF.io.write_en    := decoder.io.weightRF_wen
    weightRF.io.read_en     := decoder.io.weightRF_ren
    weightRF.io.waddr       := decoder.io.weightRF_waddr
    weightRF.io.raddr_int   := decoder.io.weightRF_raddr_int
    weightRF.io.raddr_ext   := decoder.io.weightRF_raddr_ext
    
    // Optional Control
    if ( weightRF.io.bp_slct.isDefined ) { weightRF.io.bp_slct.get := decoder.io.weightRF_bp_slct_get }
    
    // Mandatory Outputs
    weightRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.weightRF_2NoC  := weightRF.io.rdata_ext
    
    // Activation RF
    val actvtnRF = new pRF(RFports, actvtnRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    actvtnRF.io.write_en    := decoder.io.actvtnRF_wen
    actvtnRF.io.read_en     := decoder.io.actvtnRF_ren
    actvtnRF.io.waddr       := decoder.io.actvtnRF_waddr
    actvtnRF.io.raddr_int   := decoder.io.actvtnRF_raddr_int
    actvtnRF.io.raddr_ext   := decoder.io.actvtnRF_raddr_ext
    
    // Optional Control
    if ( actvtnRF.io.bp_slct.isDefined ) { actvtnRF.io.bp_slct.get := decoder.io.actvtnRF_bp_slct_get }
    
    // Mandatory Outputs
    actvtnRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.actvtnRF_2NoC     := actvtnRF.io.rdata_ext
       
    val ipuBP = if(aluFuncs.contains("Add") || aluFuncs.contains("Max")) "Firm" else "None" 
    val ipu   = new IPU(RFports, ipuBP, datawidth)
    if (ipu.io.sel.isDefined) { ipu.io.sel.get := decoder.io.ipu_sel_get }
    ipu.io.in1 := weightRF.io.rdata_int
    ipu.io.in2 := actvtnRF.io.rdata_int
    
    val alu = new ALU(aluFuncs, datawidth)
    alu.io.func_slct := decoder.io.alu_func_slct
    alu.io.innr_prod := ipu.io.out
    if(alu.io.weight_bp.isDefined) alu.io.weight_bp.get := ipu.io.bp1.get
    if(alu.io.actvtn_bp.isDefined) alu.io.actvtn_bp.get := ipu.io.bp2.get
    
    val intrnlRF = new pRF(1, intrnlRFBP, datawidth, addrwidth)
    intrnlRF.io.write_en  := decoder.io.intrnlRF_write_en
    intrnlRF.io.read_en   := decoder.io.intrnlRF_read_en
    intrnlRF.io.waddr     := decoder.io.intrnlRF_waddr
    intrnlRF.io.raddr_int := decoder.io.intrnlRF_raddr_int
    intrnlRF.io.raddr_ext := decoder.io.intrnlRF_raddr_ext
    if (intrnlRF.io.bp_slct.isDefined) { intrnlRF.io.bp_slct.get := decoder.io.intrnlRF_bp_slct_get }
    intrnlRF.io.wdata := Mux(decoder.io.intrnlRF_wdata_slct, alu.io.output, io.intrnlRF_in)
    io.intrnlRF_2NoC := intrnlRF.io.rdata_ext
    if(alu.io.rf_feedbk.isDefined) alu.io.rf_feedbk.get := intrnlRF.io.rdata_int
    
    val nlu = new NonlinearUnit(nluFuncs, datawidth)
    nlu.io.fslct := decoder.io.nlu_func_slct
    nlu.io.input     := intrnlRF.io.rdata_int
    io.output        := nlu.io.outpt
    
    // Woot woot
}

#### Verification

## Future Plans
* Verify everything using Golden Models