# nPE: A Configurable Processing Engine
#### Verification | Version 0.5.1 | Updated 2018.7.26
___

## Setup

In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"""
C:\Users\RyanL\OneDrive\Research\SEAL\processing-engine/source/load-ivy.sc
"""[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

import scala.math.pow

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[39m
[32mimport [39m[36mscala.math.pow[39m

## Parallel Register File

### Single Register File

#### Definition

In [42]:
class RFConfig(val addrWidth: Int, val dataWidth: Int, val bpSupport: Boolean)

class RFControl(addrWidth: Int, bpSupp: Boolean) extends Bundle {
    
    override def cloneType = (new RFControl(addrWidth, bpSupp)).asInstanceOf[this.type]
    
    val wEnable  = Bool()
    val rEnable  = Bool()
    val wAddr    = UInt(addrWidth.W)
    val rAddrInt = UInt(addrWidth.W)
    val rAddrExt = UInt(addrWidth.W)
    val bpSel    = if (bpSupp) Some(Bool()) else None
}

class RFOutputs(dataWidth: Int) extends Bundle {
    
    override def cloneType = (new RFOutputs(dataWidth)).asInstanceOf[this.type]
    
    val int = SInt(dataWidth.W)
    val ext = SInt(dataWidth.W)
}

class RF(config: RFConfig) extends Module {
  
    val aw = config.addrWidth
    val dw = config.dataWidth
    val bp = config.bpSupport
    
    val io = IO(new Bundle {
        val control = Input(new RFControl(aw, bp))
        val dataIn  = Input(SInt(dw.W))
        val dataOut = Output(new RFOutputs(dw))
    })
    
    val registers = RegInit(Vec(Seq.fill(pow(2, aw).toInt) { 0.S(aw.W) }))
    
    when (io.control.wEnable) {
        registers(io.control.wAddr) := io.dataIn
    }
    
    when (io.control.rEnable) {
        when(io.control.bpSel.getOrElse(false.B)) {
            io.dataOut.int := io.dataIn
            io.dataOut.ext := io.dataIn
        } .otherwise {
            io.dataOut.int := registers(io.control.rAddrInt)
            io.dataOut.ext := registers(io.control.rAddrExt)
        }
    } .otherwise {
        io.dataOut.int := 0.S
        io.dataOut.ext := 0.S
    }
}

defined [32mclass[39m [36mRFConfig[39m
defined [32mclass[39m [36mRFControl[39m
defined [32mclass[39m [36mRFOutputs[39m
defined [32mclass[39m [36mRF[39m

#### Verification

In [43]:
val rfCon = new RFConfig(addrWidth = 4, dataWidth = 8, bpSupport = true)

Driver(() => new RF(rfCon)) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.bpSel.get, false)
        poke(uut.io.control.wEnable, true)   
        poke(uut.io.control.rEnable, true)

        poke(uut.io.control.wAddr, 1)
        poke(uut.io.dataIn, 1)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 1)
        expect(uut.io.dataOut.ext, 1)
        
        poke(uut.io.control.wAddr, 2)
        poke(uut.io.dataIn, 2)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.dataOut.ext, 2)
        
        poke(uut.io.control.wAddr, 3)
        poke(uut.io.dataIn, 3)
        
        step(1)
        
        poke(uut.io.control.rAddrInt, 1)
        expect(uut.io.dataOut.int, 1)
        
        poke(uut.io.control.rAddrExt, 2)
        expect(uut.io.dataOut.ext, 2)
        
        poke(uut.io.control.rAddrInt, 3)
        expect(uut.io.dataOut.int, 3)
        
        step(1)
        
        poke(uut.io.control.bpSel.get, true)
        poke(uut.io.dataIn, 10)
        expect(uut.io.dataOut.int, 10)
        expect(uut.io.dataOut.ext, 10)
        
        poke(uut.io.control.bpSel.get, false)
        expect(uut.io.dataOut.int, 3)
        expect(uut.io.dataOut.ext, 2)
        
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.010] Done elaborating.
Total FIRRTL Compile Time: 38.3 ms
Total FIRRTL Compile Time: 27.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723136851
test cmd41WrapperHelperRF Success: 11 tests passed in 9 cycles taking 0.019763 seconds
[[35minfo[0m] [0.018] RAN 4 CYCLES PASSED


[36mrfCon[39m: [32mRFConfig[39m = $sess.cmd41Wrapper$Helper$RFConfig@5b4daca8
[36mres42_1[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [44]:
class PRFConfig(
        val ports: Int,
        val dataWidth: Int,
        val addrWidth: Int,
        val bpType: String) {
    
    require(List("None", "Soft", "Hard") contains bpType)
    
    val bpNone = (bpType == "None")
    val bpSoft = (bpType == "Soft")
    val bpHard = (bpType == "Hard")
    
    val rfConfig = new RFConfig(addrWidth, dataWidth, bpSoft)
}

class PRFControl(ports: Int, addrWidth: Int, bpSoft: Boolean) extends Bundle {
    
    override def cloneType = (new PRFControl(ports, addrWidth, bpSoft)).asInstanceOf[this.type]
    
    val rf = Vec(ports, new RFControl(addrWidth, bpSoft))
}

class PRF(config: PRFConfig) extends Module {
    
    val pt = config.ports
    val aw = config.addrWidth
    val dw = config.dataWidth
    
    val rfConfig = config.rfConfig

    val bpNone = config.bpNone
    val bpSoft = config.bpSoft
    val bpHard = config.bpHard
    
    val io = IO(new Bundle {
        val control = Input(new PRFControl(pt, aw, bpSoft))
        val dataIn = Input(Vec(pt, SInt(dw.W)))
        val dataOut = Output(Vec(pt, new RFOutputs(dw)))
    })
    
    if (bpNone || bpSoft) {
        
        val rf = Seq.fill(pt){ Module(new RF(rfConfig)) }
        
        rf.zipWithIndex.map {
            case (x: RF, i: Int) => { 
                x.io.control <> io.control.rf(i)
                x.io.dataIn := io.dataIn(i)
                
                io.dataOut(i) <> x.io.dataOut
            } 
        }
        
    } else if (bpHard) {
        for (i <- 0 until pt) {
            io.dataOut(i).int := io.dataIn(i)
            io.dataOut(i).ext := io.dataIn(i)
        }
    }
}

defined [32mclass[39m [36mPRFConfig[39m
defined [32mclass[39m [36mPRFControl[39m
defined [32mclass[39m [36mPRF[39m

#### Verification

In [45]:
val prfCon = new PRFConfig(
    ports = 2,
    addrWidth = 4,
    dataWidth = 8,
    bpType = "Soft")

Driver(() => new PRF(prfCon)) {
    uut => new PeekPokeTester(uut) {
         
        poke(uut.io.control.rf(0).wEnable, true)  
        poke(uut.io.control.rf(1).wEnable, true) 
        poke(uut.io.control.rf(0).rEnable, true)
        poke(uut.io.control.rf(1).rEnable, true)
        poke(uut.io.control.rf(0).bpSel.get, false)
        poke(uut.io.control.rf(1).bpSel.get, false)

        poke(uut.io.control.rf(0).wAddr, 1)
        poke(uut.io.control.rf(1).wAddr, 1)
        poke(uut.io.dataIn(0), 1)
        poke(uut.io.dataIn(1), 1)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 1)
        poke(uut.io.control.rf(1).rAddrExt, 1)
        expect(uut.io.dataOut(0).ext, 1)
        expect(uut.io.dataOut(1).ext, 1)
        
        // Write
        poke(uut.io.control.rf(0).wAddr, 2)
        poke(uut.io.control.rf(1).wAddr, 2)
        poke(uut.io.dataIn(0), 2)
        poke(uut.io.dataIn(1), 2)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 2)
        poke(uut.io.control.rf(1).rAddrExt, 2)
        expect(uut.io.dataOut(0).ext, 2)
        expect(uut.io.dataOut(1).ext, 2)
        
        // Write
        poke(uut.io.control.rf(0).wAddr, 3)
        poke(uut.io.control.rf(1).wAddr, 3)
        poke(uut.io.dataIn(0), 3)
        poke(uut.io.dataIn(1), 3)
        
        step(1)
        
        // Read
        poke(uut.io.control.rf(0).rAddrInt, 1)
        poke(uut.io.control.rf(1).rAddrInt, 1)
        expect(uut.io.dataOut(0).int, 1)
        expect(uut.io.dataOut(1).int, 1)
        
        poke(uut.io.control.rf(0).rAddrExt, 2)
        poke(uut.io.control.rf(1).rAddrExt, 2)
        expect(uut.io.dataOut(0).ext, 2)
        expect(uut.io.dataOut(1).ext, 2)
        
        poke(uut.io.control.rf(0).rAddrInt, 3)
        poke(uut.io.control.rf(1).rAddrInt, 3)
        expect(uut.io.dataOut(0).int, 3)
        expect(uut.io.dataOut(1).int, 3)
        
        // Bypass
        poke(uut.io.control.rf(0).bpSel.get, true)
        poke(uut.io.control.rf(1).bpSel.get, false)
        poke(uut.io.dataIn(0), 10)
        poke(uut.io.dataIn(1), 10)
        expect(uut.io.dataOut(0).int, 10)
        expect(uut.io.dataOut(1).int, 3)
        expect(uut.io.dataOut(0).ext, 10)
        expect(uut.io.dataOut(1).ext, 2)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.014] Done elaborating.
Total FIRRTL Compile Time: 49.3 ms
Total FIRRTL Compile Time: 40.3 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723141424
test cmd43WrapperHelperPRF Success: 18 tests passed in 8 cycles taking 0.036353 seconds
[[35minfo[0m] [0.033] RAN 3 CYCLES PASSED


[36mprfCon[39m: [32mPRFConfig[39m = $sess.cmd43Wrapper$Helper$PRFConfig@5d5b31e1
[36mres44_1[39m: [32mBoolean[39m = [32mtrue[39m

## Inner Product Unit

### Parallel Multiplier

#### Definition

In [46]:
class PMultConfig(val numPairs: Int, val bitWidth: Int) {
    require(numPairs >= 1, "Must have at least one pair of multiplicands.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}

class PMultInput(numPairs: Int, bitWidth: Int) extends Bundle {
    
    override def cloneType = (new PMultInput(numPairs, bitWidth)).asInstanceOf[this.type]
    
    val weight = Vec(numPairs, SInt(bitWidth.W))
    val actvtn = Vec(numPairs, SInt(bitWidth.W))
}

class PMult(config: PMultConfig) extends Module {
    
    val np = config.numPairs
    val bw = config.bitWidth
    
    val io = IO(new Bundle {
        val in = Input(new PMultInput(np, bw))
        val prod = Output(Vec(np, SInt(bw.W)))
    })
    
    io.prod := (io.in.weight zip io.in.actvtn).map { case(a, b) => a * b }
}

defined [32mclass[39m [36mPMultConfig[39m
defined [32mclass[39m [36mPMultInput[39m
defined [32mclass[39m [36mPMult[39m

#### Verification

In [47]:
val pMultCon = new PMultConfig(numPairs = 4, bitWidth = 8)

Driver(() => new PMult(pMultCon)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in.weight(0), 1) 
        poke(uut.io.in.actvtn(0), 2)
        
        poke(uut.io.in.weight(1), 3) 
        poke(uut.io.in.actvtn(1), 4)
        
        poke(uut.io.in.weight(2), 5)
        poke(uut.io.in.actvtn(2), 6)
        
        poke(uut.io.in.weight(3), 7)
        poke(uut.io.in.actvtn(3), 8)
        
        expect(uut.io.prod(0), 2)
        expect(uut.io.prod(1), 12)
        expect(uut.io.prod(2), 30)
        expect(uut.io.prod(3), 56)
  }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.004] Done elaborating.
Total FIRRTL Compile Time: 7.8 ms
Total FIRRTL Compile Time: 6.7 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723146968
test cmd45WrapperHelperPMult Success: 4 tests passed in 5 cycles taking 0.003905 seconds
[[35minfo[0m] [0.002] RAN 0 CYCLES PASSED


[36mpMultCon[39m: [32mPMultConfig[39m = $sess.cmd45Wrapper$Helper$PMultConfig@2e5c82e9
[36mres46_1[39m: [32mBoolean[39m = [32mtrue[39m

### Additive Reduction Tree

#### Definition

In [48]:
class AdditiveRTConfig(val numAddends: Int, val bitWidth: Int) {
    require(numAddends >= 1, "Number of addends must be at least one.")
    require(bitWidth >= 1, "Bitwidth must be at least one.")
}

// Recursively creates a balanced syntax tree
def adjReduce[A](xs: List[A], op: (A, A) => A): A = xs match {
    case Nil => throw new IllegalArgumentException
    case List(single) => single
    case default => {
        val grouped = default.grouped(2).toList
        val result = for (g <- grouped) yield {
            g match {
                case List(a, b) => op(a, b)
                case List(x) => x
            }
        }
        adjReduce(result, op)
    }
}

class AdditiveRT(config: AdditiveRTConfig) extends Module {

    val na = config.numAddends
    val bw = config.bitWidth
    
    val io = IO(new Bundle {
        val in  = Input(Vec(na, SInt(bw.W)))
        val sum = Output(SInt(bw.W))
    })
    
    io.sum := adjReduce(io.in toList, (x: SInt, y: SInt) => x + y)
}

defined [32mclass[39m [36mAdditiveRTConfig[39m
defined [32mfunction[39m [36madjReduce[39m
defined [32mclass[39m [36mAdditiveRT[39m

#### Verilog

In [49]:
val additiveRTCon = new AdditiveRTConfig(numAddends = 4, bitWidth = 8)
println(getVerilog(new AdditiveRT(additiveRTCon)))

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.003] Done elaborating.
Total FIRRTL Compile Time: 10.0 ms

module cmd47WrapperHelperAdditiveRT( // @[:@3.2]
  input        clock, // @[:@4.4]
  input        reset, // @[:@5.4]
  input  [7:0] io_in_0, // @[:@6.4]
  input  [7:0] io_in_1, // @[:@6.4]
  input  [7:0] io_in_2, // @[:@6.4]
  input  [7:0] io_in_3, // @[:@6.4]
  output [7:0] io_sum // @[:@6.4]
);
  wire [8:0] _T_12; // @[cmd47.sc 32:63:@8.4]
  wire [7:0] _T_13; // @[cmd47.sc 32:63:@9.4]
  wire [7:0] _T_14; // @[cmd47.sc 32:63:@10.4]
  wire [8:0] _T_15; // @[cmd47.sc 32:63:@11.4]
  wire [7:0] _T_16; // @[cmd47.sc 32:63:@12.4]
  wire [7:0] _T_17; // @[cmd47.sc 32:63:@13.4]
  wire [8:0] _T_18; // @[cmd47.sc 32:63:@14.4]
  wire [7:0] _T_19; // @[cmd47.sc 32:63:@15.4]
  wire [7:0] _T_20; // @[cmd47.sc 32:63:@16.4]
  assign _T_12 = $signed(io_in_0) + $signed(io_in_1); // @[cmd47.sc 32:63:@8.4]
  assign _T_13 = _T_12[7:0]; // @[cmd47.sc 32:63:@9.4]
  assign _T_14 = $sign

[36madditiveRTCon[39m: [32mAdditiveRTConfig[39m = $sess.cmd47Wrapper$Helper$AdditiveRTConfig@54723759

#### Verification

In [50]:
Driver(() => new AdditiveRT(additiveRTCon)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
        
        poke(uut.io.in(0), 1) 
        poke(uut.io.in(1), 2)
        poke(uut.io.in(2), 8) 
        poke(uut.io.in(3), 9) 
        expect(uut.io.sum, 20)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.001] Done elaborating.
Total FIRRTL Compile Time: 5.9 ms
Total FIRRTL Compile Time: 5.3 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723153072
test cmd47WrapperHelperAdditiveRT Success: 2 tests passed in 5 cycles taking 0.001970 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED


[36mres49[39m: [32mBoolean[39m = [32mtrue[39m

### Putting them Together

#### Definition

In [51]:
class IPUConfig(val width: Int, val bitWidth: Int, val bpType: String) {
    
    private val bypssError = "Bypass must be \"None\" or \"Firm\""
    private val widthError = "Width must be at least one"
    private val bitWdError = "Data bitwidth must be non-negative"
    
    val supportedBp = List("None", "Firm")
    
    require(width >= 1, widthError)
    require(supportedBp.contains(bpType), bypssError)
    require(bitWidth >= 0, bitWdError)
    
    val pMultConfig = new PMultConfig(numPairs = width, bitWidth = bitWidth)
    
    val additiveRTConfig = new AdditiveRTConfig(
        numAddends = width, bitWidth = bitWidth)
    
    val bpFirm = (bpType == "Firm")
}

class IPUOutput(bitWidth: Int, bpFirm: Boolean) extends Bundle {
    
    override def cloneType = (new IPUOutput(bitWidth, bpFirm)).asInstanceOf[this.type]
    
    val innerProd = Output(SInt(bitWidth.W))
    val bpWeight = if (bpFirm) Some(SInt(bitWidth.W)) else None
    val bpActvtn = if (bpFirm) Some(SInt(bitWidth.W)) else None
}


class IPU(config: IPUConfig) extends Module {
    
    val bitWd = config.bitWidth
    val width = config.width
    val bpFirm = config.bpFirm
    
    val pmConfig = config.pMultConfig
    val artConfig = config.additiveRTConfig
    
    val io = IO(new Bundle {
        val dataIn = Input(new PMultInput(width, bitWd))
        val dataOut = Output(new IPUOutput(bitWd, bpFirm))
        val bpSel = if (bpFirm) Some(Input(Vec(width, Bool()))) else None
    })
    
    val pMult = Module(new PMult(pmConfig))
    pMult.io.in <> io.dataIn
    
    val additiveRT = Module(new AdditiveRT(artConfig))
    additiveRT.io.in := pMult.io.prod
    
    io.dataOut.innerProd := additiveRT.io.sum
    
    if (bpFirm) {
        io.dataOut.bpWeight.get := PriorityMux(io.bpSel.get, io.dataIn.weight)
        io.dataOut.bpActvtn.get := PriorityMux(io.bpSel.get, io.dataIn.actvtn)
    }
}

defined [32mclass[39m [36mIPUConfig[39m
defined [32mclass[39m [36mIPUOutput[39m
defined [32mclass[39m [36mIPU[39m

#### Verification

In [52]:
val ipuCon = new IPUConfig(width = 4, bitWidth = 8, bpType = "Firm")

Driver(() => new IPU(ipuCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 0)
        poke(uut.io.bpSel.get(2), 0)
        poke(uut.io.bpSel.get(3), 0)
        
        poke(uut.io.dataIn.weight(0), 1)
        poke(uut.io.dataIn.weight(1), 2)
        poke(uut.io.dataIn.weight(2), 3)
        poke(uut.io.dataIn.weight(3), 4)
        
        poke(uut.io.dataIn.actvtn(0), 5)
        poke(uut.io.dataIn.actvtn(1), 6)
        poke(uut.io.dataIn.actvtn(2), 7)
        poke(uut.io.dataIn.actvtn(3), 8)
        
        expect(uut.io.dataOut.innerProd, 70)
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 1)
        poke(uut.io.bpSel.get(2), 0)
        poke(uut.io.bpSel.get(3), 0)
        
        expect(uut.io.dataOut.bpWeight.get, 2)
        expect(uut.io.dataOut.bpActvtn.get, 6)
        
        poke(uut.io.bpSel.get(0), 0)
        poke(uut.io.bpSel.get(1), 0)
        poke(uut.io.bpSel.get(2), 1)
        poke(uut.io.bpSel.get(3), 0)
        
        expect(uut.io.dataOut.bpWeight.get, 3)
        expect(uut.io.dataOut.bpActvtn.get, 7)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.008] Done elaborating.
Total FIRRTL Compile Time: 22.2 ms
Total FIRRTL Compile Time: 18.5 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723156836
test cmd50WrapperHelperIPU Success: 5 tests passed in 5 cycles taking 0.003581 seconds
[[35minfo[0m] [0.002] RAN 0 CYCLES PASSED


[36mipuCon[39m: [32mIPUConfig[39m = $sess.cmd50Wrapper$Helper$IPUConfig@3cd0d409
[36mres51_1[39m: [32mBoolean[39m = [32mtrue[39m

## ALU

#### Definition

In [53]:
class ALUConfig(val dataWidth: Int, val funcs: List[String]) {
    val identityError = "ALU functions must explicitly include Identity."
    val functionError = "Unsupported Error"
    val supportedFuncs = List("Identity", "Add", "Max", "Accumulate")
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs) { require(supportedFuncs.contains(x), functionError) }
    
    val addSupp = funcs.contains("Add")
    val maxSupp = funcs.contains("Max")
    val accSupp = funcs.contains("Accumulate")
    val addBypassIn = addSupp || maxSupp
    val numFunc = funcs.length
}

class ALUInput(
        dataWidth: Int,
        numFuncs: Int,
        accSupp: Boolean,
        addBypassIn: Boolean)
    extends Bundle {
    
    override def cloneType = 
        ((new ALUInput(dataWidth, numFuncs, accSupp, addBypassIn))
        .asInstanceOf[this.type])
    
    val innerProd = Input(SInt(dataWidth.W))
    val funcSel = Input(Vec(numFuncs, Bool()))
    
    val weightBp = if(addBypassIn) Some(Input(SInt(dataWidth.W))) else None
    val actvtnBp = if(addBypassIn) Some(Input(SInt(dataWidth.W))) else None
    val rfFeedback = if(accSupp) Some(Input(SInt(dataWidth.W))) else None
}

class ALU(config: ALUConfig) extends Module {
    
    val addSupp = config.addSupp
    val maxSupp = config.maxSupp
    val accSupp = config.accSupp
    val addBpIn = config.addBypassIn
    val numFunc = config.numFunc
    val dw = config.dataWidth
 
    val io = IO(new Bundle {
        val in = new ALUInput(dw, numFunc, accSupp, addBpIn)
        val out = Output(SInt(dw.W))
    })
    
    val idnOut = Some(Wire(SInt(dw.W)))
    val addOut = if(addSupp) Some(Wire(SInt(dw.W))) else None
    val maxOut = if(maxSupp) Some(Wire(SInt(dw.W))) else None
    val accOut = if(accSupp) Some(Wire(SInt(dw.W))) else None
    
    idnOut.get := io.in.innerProd
    
    if (addSupp) { addOut.get := io.in.weightBp.get + io.in.actvtnBp.get }
    if (accSupp) { accOut.get := io.in.innerProd + io.in.rfFeedback.get }
    if (maxSupp) {
        when (io.in.weightBp.get > io.in.actvtnBp.get) {
            maxOut.get := io.in.weightBp.get
        } .otherwise {
            maxOut.get := io.in.actvtnBp.get
        }
    }
    
    val inters = (idnOut :: addOut :: maxOut :: accOut :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.funcSel, inters)
}

defined [32mclass[39m [36mALUConfig[39m
defined [32mclass[39m [36mALUInput[39m
defined [32mclass[39m [36mALU[39m

#### Verification

In [54]:
val aluFuncs = "Identity" :: "Add" :: "Max" :: "Accumulate" :: Nil
val aluCon = new ALUConfig(dataWidth = 8, funcs = aluFuncs)

Driver(() => new ALU(aluCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.innerProd, 1)
        poke(uut.io.in.weightBp.get, 2)
        poke(uut.io.in.actvtnBp.get, 3)
        poke(uut.io.in.rfFeedback.get, 4)
        
        poke(uut.io.in.funcSel(0), 1)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 1)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 1)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 1)
        poke(uut.io.in.funcSel(3), 0)
        expect(uut.io.out, 3)
        
        poke(uut.io.in.funcSel(0), 0)
        poke(uut.io.in.funcSel(1), 0)
        poke(uut.io.in.funcSel(2), 0)
        poke(uut.io.in.funcSel(3), 1)
        expect(uut.io.out, 5)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.003] Done elaborating.
Total FIRRTL Compile Time: 6.2 ms
Total FIRRTL Compile Time: 6.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723160528
test cmd52WrapperHelperALU Success: 4 tests passed in 5 cycles taking 0.003265 seconds
[[35minfo[0m] [0.003] RAN 0 CYCLES PASSED


[36maluFuncs[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"Identity"[39m, [32m"Add"[39m, [32m"Max"[39m, [32m"Accumulate"[39m)
[36maluCon[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@466060bb
[36mres53_2[39m: [32mBoolean[39m = [32mtrue[39m

## Nonlinear Unit

In [55]:
class NLUConfig(val dataWidth: Int, val funcs: List[String]) {
    val supportedFuncs = List("Identity", "ReLu")
    val identityError = "NLU functions must explicitly include Identity."
    val functionError = "Unsupported Function"
    
    require(funcs.contains("Identity"), identityError)
    for(x <- funcs)(require(supportedFuncs.contains(x), functionError))
    
    val reluSupp = funcs.contains("ReLu")
    val numFuncs = funcs.length
}

class NLUInputs(dataWidth: Int, numFuncs: Int, reluSupp: Boolean) extends Bundle {
    
    override def cloneType = 
        (new NLUInputs(dataWidth, numFuncs, reluSupp)).asInstanceOf[this.type]
    
    val data = SInt(dataWidth.W)
    val fSel = Vec(numFuncs, Bool())
}

class NLU(config: NLUConfig) extends Module {
    
    val dataWidth = config.dataWidth
    val reluSupp = config.reluSupp
    val numFuncs = config.numFuncs
    
    val io = IO(new Bundle {
        val in  = Input(new NLUInputs(dataWidth, numFuncs, reluSupp))
        val out = Output(SInt(dataWidth.W))
    })
    
    val idRes   = Some(Wire(SInt(dataWidth.W)))
    val reluRes = if(reluSupp) Some(Wire(SInt(dataWidth.W))) else None
    
    idRes.get := io.in.data
    
    if (reluSupp) {
        when (io.in.data > 0.S) {
            reluRes.get := io.in.data
        } .otherwise {
            reluRes.get := 0.S
        }
    }
    
    val inters = (idRes :: reluRes :: Nil) filter ( _.isDefined ) map ( _.get )
    io.out := PriorityMux(io.in.fSel, inters)
}

defined [32mclass[39m [36mNLUConfig[39m
defined [32mclass[39m [36mNLUInputs[39m
defined [32mclass[39m [36mNLU[39m

In [56]:
val nluFuncs = "Identity" :: "ReLu" :: Nil
val nluCon = new NLUConfig(dataWidth = 8, funcs = nluFuncs)

Driver(() => new NLU(nluCon)) {
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.in.data, 5)
        
        poke(uut.io.in.fSel(0), 1)
        poke(uut.io.in.fSel(1), 0)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.fSel(0), 0)
        poke(uut.io.in.fSel(1), 1)
        expect(uut.io.out, 5)
        
        poke(uut.io.in.data, -4)
        
        poke(uut.io.in.fSel(0), 1)
        poke(uut.io.in.fSel(1), 0)
        expect(uut.io.out, -4)
        
        poke(uut.io.in.fSel(0), 0)
        poke(uut.io.in.fSel(1), 1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.004] Done elaborating.
Total FIRRTL Compile Time: 3.8 ms
Total FIRRTL Compile Time: 2.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532723163395
test cmd54WrapperHelperNLU Success: 4 tests passed in 5 cycles taking 0.001686 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED


[36mnluFuncs[39m: [32mList[39m[[32mString[39m] = [33mList[39m([32m"Identity"[39m, [32m"ReLu"[39m)
[36mnluCon[39m: [32mNLUConfig[39m = $sess.cmd54Wrapper$Helper$NLUConfig@52af33b3
[36mres55_2[39m: [32mBoolean[39m = [32mtrue[39m

## Control

### State Machine

#### Definition

In [340]:
class StateMachine(numStates: Int, nextState: (UInt, UInt, Int) => UInt, ctrlWidth: Int) extends Module {
    
    val stateWidth: Int = log2Up(numStates)
    
    val io = IO(new Bundle {
        val control = Input (UInt(ctrlWidth.W ))
        val out     = Output(UInt(stateWidth.W))
    })
    
    val register = RegInit(0.U(stateWidth.W))
    register := nextState(register, io.control, ctrlWidth)
    io.out := register
}

defined [32mclass[39m [36mStateMachine[39m

#### Example

In [341]:
def stateMap(state: UInt, control: UInt, stateWidth: Int): UInt = {
    
    val nextState = Wire(UInt(stateWidth.W))
    
    when      (state === 0.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 0.U & control === 1.U) { nextState := 1.U }
    .elsewhen (state === 1.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 1.U & control === 1.U) { nextState := 1.U }
    .otherwise { nextState := 0.U }
    
    nextState
}

defined [32mfunction[39m [36mstateMap[39m

#### Verification

In [343]:
Driver(() => new StateMachine(2, stateMap, 4)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.control, 0)
        expect(uut.io.out, 0)
        
        // 0 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
        
        // 0 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 10.1 ms
Total FIRRTL Compile Time: 12.5 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532561171036
test cmd339WrapperHelperStateMachine Success: 5 tests passed in 9 cycles taking 0.004398 seconds
[[35minfo[0m] [0.004] RAN 4 CYCLES PASSED


[36mres342[39m: [32mBoolean[39m = [32mtrue[39m

### Decoder

#### Definition

In [57]:
class DecoderConfig(
        val numStates: Int,
        val prfConfig: PRFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig,
        val decodePRF: (Int, Int, Boolean, UInt, String) => Data,
        val decodeIPU: (Int, UInt) => Data,
        val decodeALU: (Int, UInt) => Data,
        val decodeNLU: (Int, UInt) => Data,
        val decodeIntSel: UInt => Data) {
    
    private implicit def btoi(b:Boolean) = if (b) 1 else 0
    
    val bpPRF = prfConfig.bpSoft
    
    val addSupp = aluFuncs.contains("Add")
    val maxSupp = aluFuncs.contains("Max")
    val accSupp = aluFuncs.contains("Accumulate")
    val reluSupp = nluFuncs.contains("ReLu")
    
    val bpIPU = addSupp || maxSupp
    val numALUFuncs = 1 + addSupp + maxSupp + accSupp
    val numNLUFuncs = 1 + reluSupp
    
    val stateWidth = log2Up(numStates)
}

class Decoder(c: DecoderConfig) extends Module {
    
    val ports = c.prfConfig.ports
    val addrWidth = c.prfConfig.addrWidth
    
    val io = IO(new Bundle {
        val state = Input(UInt(c.stateWidth.W))
        val weightPRF = Output(new PRFControl(ports, addrWidth, c.bpPRF))
        val actvtnPRF = Output(new PRFControl(ports, addrWidth, c.bpPRF))
        val intrnlPRF = Output(new PRFControl(ports, addrWidth, c.bpPRF))
        val intrnlPRFSel = Output(Bool())
        val aluFSel = Output(Vec(c.numALUFuncs, Bool()))
        val nluFSel = Output(Vec(c.numNLUFuncs, Bool()))
        val ipuBpSel = if (c.bpIPU) Some(Output(Vec(ports, Bool()))) else None
    })
    
    io.weightPRF <> c.decodePRF(ports, addrWidth, c.bpPRF, io.state, "weightPRF")
    io.actvtnPRF <> c.decodePRF(ports, addrWidth, c.bpPRF, io.state, "actvtnPRF")
    io.intrnlPRF <> c.decodePRF(ports, addrWidth, c.bpPRF, io.state, "intrnlPRF")
    io.intrnlPRFSel := c.decodeIntSel(io.state)
    
    if (c.bpIPU) { io.ipuBpSel.get := c.decodeIPU(ports, io.state) }
    io.aluFSel := c.decodeALU(c.numALUFuncs, io.state)
    io.nluFSel := c.decodeNLU(c.numNLUFuncs, io.state)
}

defined [32mclass[39m [36mDecoderConfig[39m
defined [32mclass[39m [36mDecoder[39m

#### Example

In [53]:
def decodePRF(ports: Int, addrWidth: Int, bpPRF: Boolean, state: UInt, bus: String) = {
    
    val data = Wire(new PRFControl(ports, addrWidth, bpPRF))
    
    when (state === 0.U) {
        bus match {
            case "weightPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := true.B
                    k.rEnable   := true.B
                    k.wAddr     := 1.U
                    k.rAddrInt  := 2.U
                    k.rAddrExt  := 3.U
                    if (bpPRF) { k.bpSel.get := true.B }
                }
            }
            case "actvtnPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := true.B
                    k.rEnable   := true.B
                    k.wAddr     := 1.U
                    k.rAddrInt  := 2.U
                    k.rAddrExt  := 3.U
                    if (bpPRF) { k.bpSel.get := true.B }
                }
            }
            case "intrnlPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := true.B
                    k.rEnable   := true.B
                    k.wAddr     := 1.U
                    k.rAddrInt  := 2.U
                    k.rAddrExt  := 3.U
                    if (bpPRF) { k.bpSel.get := true.B }
                }
            }
        }
    } .otherwise {
        bus match {
            case "weightPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := false.B
                    k.rEnable   := false.B
                    k.wAddr     := 4.U
                    k.rAddrInt  := 5.U
                    k.rAddrExt  := 6.U
                    if (bpPRF) { k.bpSel.get := false.B }
                }
            }
            case "actvtnPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := false.B
                    k.rEnable   := false.B
                    k.wAddr     := 4.U
                    k.rAddrInt  := 5.U
                    k.rAddrExt  := 6.U
                    if (bpPRF) { k.bpSel.get := false.B }
                }
            }
            case "intrnlPRF" => {
                data.rf.foreach { k =>
                    k.wEnable   := false.B
                    k.rEnable   := false.B
                    k.wAddr     := 4.U
                    k.rAddrInt  := 5.U
                    k.rAddrExt  := 6.U
                    if (bpPRF) { k.bpSel.get := false.B }
                }
            }
        }
    }
    
    data 
}

def decodeIntSel(state: UInt) = {
    val data = Wire(Bool())
    
    when (state === 0.U) {
        data := true.B
    } .otherwise {
        data := false.B
    }
    
    data
}

def decodeIPU(ports: Int, state: UInt) = {
    val data = Wire(Vec(ports, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}

def decodeALU(numALUFuncs: Int, state: UInt) = {
    val data = Wire(Vec(numALUFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: 0.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: 0.U :: 0.U :: Nil)
    }
    
    data
}

def decodeNLU(numNLUFuncs: Int, state: UInt) = {
    val data = Wire(Vec(numNLUFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}


defined [32mfunction[39m [36mdecodePRF[39m
defined [32mfunction[39m [36mdecodeIntSel[39m
defined [32mfunction[39m [36mdecodeIPU[39m
defined [32mfunction[39m [36mdecodeALU[39m
defined [32mfunction[39m [36mdecodeNLU[39m

#### Verification

In [58]:
Driver(() => new Decoder(
  ports = 2,
  addrWidth = 4,
  stateWidth = 4,
  bpPRF = true,
  addSupp = true,
  maxSupp = true,
  accSupp = true,
  reluSupp = true,
  decodePRF = decodePRF,
  decodeIPU = decodeIPU,
  decodeALU = decodeALU,
  decodeNLU = decodeNLU)) {
    
    uut => new PeekPokeTester(uut) {
        
        
        poke(uut.io.state, 0.U)
        step(1)
        
        expect(uut.io.weightPRF.rf(0).wEnable, true.B)
        expect(uut.io.weightPRF.rf(0).rEnable, true.B)
        expect(uut.io.weightPRF.rf(0).wAddr, 1.U)
        expect(uut.io.weightPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.weightPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.weightPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.actvtnPRF.rf(0).wEnable, true.B)
        expect(uut.io.actvtnPRF.rf(0).rEnable, true.B)
        expect(uut.io.actvtnPRF.rf(0).wAddr, 1.U)
        expect(uut.io.actvtnPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.actvtnPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.actvtnPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.ipuBpSel.get(0), 1)
        expect(uut.io.ipuBpSel.get(1), 0)
        
        expect(uut.io.aluFSel(0), 1)
        expect(uut.io.aluFSel(1), 0)
        expect(uut.io.aluFSel(2), 0)
        expect(uut.io.aluFSel(3), 0)
        
        expect(uut.io.intrnlPRF.rf(0).wEnable, true.B)
        expect(uut.io.intrnlPRF.rf(0).rEnable, true.B)
        expect(uut.io.intrnlPRF.rf(0).wAddr, 1.U)
        expect(uut.io.intrnlPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.intrnlPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.intrnlPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.nluFSel(0), 1)
        expect(uut.io.nluFSel(1), 0)
        
        poke(uut.io.state, 1.U) 
        step(1)
        
        expect(uut.io.weightPRF.rf(0).wEnable, false.B)
        expect(uut.io.weightPRF.rf(0).rEnable, false.B)
        expect(uut.io.weightPRF.rf(0).wAddr, 4.U)
        expect(uut.io.weightPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.weightPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.weightPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.actvtnPRF.rf(0).wEnable, false.B)
        expect(uut.io.actvtnPRF.rf(0).rEnable, false.B)
        expect(uut.io.actvtnPRF.rf(0).wAddr, 4.U)
        expect(uut.io.actvtnPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.actvtnPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.actvtnPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.ipuBpSel.get(0), 0)
        expect(uut.io.ipuBpSel.get(1), 1)
        
        expect(uut.io.aluFSel(0), 0)
        expect(uut.io.aluFSel(1), 1)
        expect(uut.io.aluFSel(2), 0)
        expect(uut.io.aluFSel(3), 0)
        
        expect(uut.io.intrnlPRF.rf(0).wEnable, false.B)
        expect(uut.io.intrnlPRF.rf(0).rEnable, false.B)
        expect(uut.io.intrnlPRF.rf(0).wAddr, 4.U)
        expect(uut.io.intrnlPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.intrnlPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.intrnlPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.nluFSel(0), 0)
        expect(uut.io.nluFSel(1), 1)
        
    }
}


[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.009] Done elaborating.
Total FIRRTL Compile Time: 41.1 ms
Total FIRRTL Compile Time: 22.7 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1532655865910
test cmd54WrapperHelperDecoder Success: 52 tests passed in 7 cycles taking 0.014877 seconds
[[35minfo[0m] [0.011] RAN 2 CYCLES PASSED


[36mres57[39m: [32mBoolean[39m = [32mtrue[39m

## PE

#### Definition

In [None]:
class PEConfig(
        val stateMachineConfig: StateMachineConfig,
        val decoderConfig: DecoderConfig,
        val prfConfig: PRFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig)

class nPE(stateMap: Map[(UInt, UInt), UInt], extrnl_ctrl_width: Int, // State Machine
          decode: (UInt, String) => Data, RFports: Int, weightRFBP: String, actvtnRFBP: String, datawidth: Int, addrwidth: Int,
          aluFuncs: List[String], nluFuncs: List[String], intrnlRFBP: String
         ) extends Module {
    
    val io = IO(new Bundle {
        val extrnl_ctrl   = Input (SInt(extrnl_ctrl_width.W))
        val weightRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_in   = Input (Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_in   = Input (SInt(datawidth.W))
        val weightRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val actvtnRF_2NoC = Output(Vec(RFports, SInt(datawidth.W)))
        val intrnlRF_2NoC = Output(SInt(datawidth.W))
        val output        = Output(SInt(datawidth.W))
    })
    
    val stateMachine = new StateMachine(stateMap, extrnl_ctrl_width)
    stateMachine.io.control := io.extrnl_ctrl
    
    val decoder = new Decoder(decode, log2Up(stateMap.size), 
                              RFports, datawidth, addrwidth, aluFuncs, nluFuncs)
    decoder.io.state := stateMachine.io.state
    
    
    // Weight RF
    val weightRF = new pRF(RFports, weightRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    weightRF.io.in.wEnable
    weightRF.io.in.rEnable
    weightRF.io.in.wAddr
    weightRF.io.in.rAddrInt
    weightRF.io.in.rAddrExt
    
    
    weightRF.io.write_en    := decoder.io.weightRF_wen
    weightRF.io.read_en     := decoder.io.weightRF_ren
    weightRF.io.waddr       := decoder.io.weightRF_waddr
    weightRF.io.raddr_int   := decoder.io.weightRF_raddr_int
    weightRF.io.raddr_ext   := decoder.io.weightRF_raddr_ext
    
    // Optional Control
    if ( weightRF.io.bp_slct.isDefined ) { weightRF.io.bp_slct.get := decoder.io.weightRF_bp_slct_get }
    
    // Mandatory Outputs
    weightRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.weightRF_2NoC  := weightRF.io.rdata_ext
    
    // Activation RF
    val actvtnRF = new pRF(RFports, actvtnRFBP, datawidth, addrwidth)
    
    // Mandatory Control
    actvtnRF.io.write_en    := decoder.io.actvtnRF_wen
    actvtnRF.io.read_en     := decoder.io.actvtnRF_ren
    actvtnRF.io.waddr       := decoder.io.actvtnRF_waddr
    actvtnRF.io.raddr_int   := decoder.io.actvtnRF_raddr_int
    actvtnRF.io.raddr_ext   := decoder.io.actvtnRF_raddr_ext
    
    // Optional Control
    if ( actvtnRF.io.bp_slct.isDefined ) { actvtnRF.io.bp_slct.get := decoder.io.actvtnRF_bp_slct_get }
    
    // Mandatory Outputs
    actvtnRF.io.wdata := io.weightRF_in
    
    // Optional Outputs
    io.actvtnRF_2NoC     := actvtnRF.io.rdata_ext
       
    val ipuBP = if(aluFuncs.contains("Add") || aluFuncs.contains("Max")) "Firm" else "None" 
    val ipu   = new IPU(RFports, ipuBP, datawidth)
    if (ipu.io.sel.isDefined) { ipu.io.sel.get := decoder.io.ipu_sel_get }
    ipu.io.in1 := weightRF.io.rdata_int
    ipu.io.in2 := actvtnRF.io.rdata_int
    
    val alu = new ALU(aluFuncs, datawidth)
    alu.io.func_slct := decoder.io.alu_func_slct
    alu.io.innr_prod := ipu.io.out
    if(alu.io.weight_bp.isDefined) alu.io.weight_bp.get := ipu.io.bp1.get
    if(alu.io.actvtn_bp.isDefined) alu.io.actvtn_bp.get := ipu.io.bp2.get
    
    val intrnlRF = new pRF(1, intrnlRFBP, datawidth, addrwidth)
    intrnlRF.io.write_en  := decoder.io.intrnlRF_write_en
    intrnlRF.io.read_en   := decoder.io.intrnlRF_read_en
    intrnlRF.io.waddr     := decoder.io.intrnlRF_waddr
    intrnlRF.io.raddr_int := decoder.io.intrnlRF_raddr_int
    intrnlRF.io.raddr_ext := decoder.io.intrnlRF_raddr_ext
    if (intrnlRF.io.bp_slct.isDefined) { intrnlRF.io.bp_slct.get := decoder.io.intrnlRF_bp_slct_get }
    intrnlRF.io.wdata := Mux(decoder.io.intrnlRF_wdata_slct, alu.io.output, io.intrnlRF_in)
    io.intrnlRF_2NoC := intrnlRF.io.rdata_ext
    if(alu.io.rf_feedbk.isDefined) alu.io.rf_feedbk.get := intrnlRF.io.rdata_int
    
    val nlu = new NonlinearUnit(nluFuncs, datawidth)
    nlu.io.fslct := decoder.io.nlu_func_slct
    nlu.io.input     := intrnlRF.io.rdata_int
    io.output        := nlu.io.outpt
    
    // Woot woot
}

#### Verification

## Future Plans
* Verify everything using Golden Models