# Unified Processing Engine
#### Verification | Version 0.6.2 | Updated 2018.7.31
___

## Setup

In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"""
C:\Users\RyanL\OneDrive\Research\SEAL\processing-engine/source/load-ivy.sc
"""[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

import scala.math.pow

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[39m
[32mimport [39m[36mscala.math.pow[39m

## Register File

##### Definition

In [3]:
class PartialRFConfig(
        val numInputs: Int,
        val numOutputs: Int,
        val numCrossInputs: Int,
        val addrWidth: Int,
        val bpSoft: Boolean,
        val bpFirm: Boolean)

class PartialRFControl(c: PartialRFConfig) extends Bundle {
    val wEnable = Vec(c.numInputs, Bool())
    val rEnable = Vec(c.numOutputs, Bool())
    val wAddr = if (!c.bpFirm) Some(Vec(c.numInputs, UInt(c.addrWidth.W))) else None
    val rAddr = if (!c.bpFirm) Some(Vec(c.numOutputs, UInt(c.addrWidth.W))) else None
    // Each output can select which input of the opposite bus to bypass from
    val bpSel = if (c.bpSoft || c.bpFirm) Some(Vec(c.numOutputs, Vec(c.numCrossInputs, Bool()))) else None
}

class RFConfig(
        val numIntInputs: Int,
        val numExtInputs: Int,
        val numIntOutputs: Int,
        val numExtOutputs: Int,
        val addrWidth: Int,
        val dataWidth: Int,
        val bpType: String) {
    
    val bpNone = (bpType == "None")
    val bpSoft = (bpType == "Soft")
    val bpFirm = (bpType == "Firm")
    
    require(bpNone || bpSoft || bpFirm, "Invalid Bypass type.\n")
    require(numIntInputs > 0 || numExtInputs > 0, "Must have at least one input.\n")
    require(numIntOutputs > 0 || numExtOutputs > 0, "Must have at least one output.\n")
    require(dataWidth > 0, "Data bitwidth must be at least one.\n") 
    if (bpFirm) { require(addrWidth == 0, "Address width must be 0 when Firm Bypassing.\n") }
    
    val intConfig = new PartialRFConfig(
        numIntInputs, numIntOutputs, numExtOutputs, addrWidth, bpSoft, bpFirm)
    
    val extConfig = new PartialRFConfig(
        numExtInputs, numExtOutputs, numIntOutputs, addrWidth, bpSoft, bpFirm)
}

class RFControl(c: RFConfig) extends Bundle {
    
    override def cloneType = (new RFControl(c)).asInstanceOf[this.type]
    
    val internal = if (c.numIntInputs > 0 || c.numIntOutputs > 0)
        Some(new PartialRFControl(c.intConfig)) else None
    val external = if (c.numExtInputs > 0 || c.numExtOutputs > 0)
        Some(new PartialRFControl(c.extConfig)) else None
}

class RF(c: RFConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(new RFControl(c))
        val wInternal = Input(Vec(c.numIntInputs, SInt(c.dataWidth.W))) 
        val wExternal = Input(Vec(c.numExtInputs, SInt(c.dataWidth.W)))
        val rInternal = Output(Vec(c.numIntOutputs, SInt(c.dataWidth.W)))
        val rExternal = Output(Vec(c.numExtOutputs, SInt(c.dataWidth.W)))
    })
    
    val dataRegister = if (!c.bpFirm) 
        Some(RegInit(Vec.fill(pow(2, c.addrWidth).toInt){0.S(c.dataWidth.W)})) else None
    
    // Need to bypass through a register to prevent combinational loops
    val bpAny = c.bpSoft || c.bpFirm
    val bpRegisterInt = if (bpAny && c.numIntInputs > 0)
        Some(RegInit(Vec.fill(c.numIntInputs){0.S(c.dataWidth.W)})) else None
    val bpRegisterExt = if (bpAny && c.numExtInputs > 0)
        Some(RegInit(Vec.fill(c.numExtInputs){0.S(c.dataWidth.W)})) else None
    
    for (i <- 0 until c.numIntInputs) {
        when (io.control.internal.get.wEnable(i)) {
            if (!c.bpFirm) { dataRegister.get(io.control.internal.get.wAddr.get(i)) := io.wInternal(i) }
            if (bpRegisterInt.isDefined) { bpRegisterInt.get(i) := io.wInternal(i) }
        }
    }
    
    for (i <- 0 until c.numExtInputs) {
        when (io.control.external.get.wEnable(i)) {
            if (!c.bpFirm) { dataRegister.get(io.control.external.get.wAddr.get(i)) := io.wExternal(i) }
            if (bpRegisterExt.isDefined) { bpRegisterExt.get(i) := io.wExternal(i) }
        }
    }
    
    for (i <- 0 until c.numIntOutputs) {
        when (io.control.internal.get.rEnable(i)) {
            if (c.bpFirm) {
                io.rInternal(i) := PriorityMux(io.control.internal.get.bpSel.get(i), bpRegisterExt.get)
            } else if (c.bpSoft) {
                when (io.control.internal.get.bpSel.get(i).contains(true.B)) {
                    // External write bypasses to Internal read
                    io.rInternal(i) := PriorityMux(io.control.internal.get.bpSel.get(i), bpRegisterExt.get)
                } .otherwise {
                    io.rInternal(i) := dataRegister.get(io.control.internal.get.rAddr.get(i))
                }
            } else {
                io.rInternal(i) := dataRegister.get(io.control.internal.get.rAddr.get(i))
            }
        } .otherwise {
            io.rInternal(i) := 0.S
        }
    }
    
    for (i <- 0 until c.numExtOutputs) {
        when (io.control.external.get.rEnable(i)) {
            if (c.bpFirm) {
                io.rExternal(i) := PriorityMux(io.control.external.get.bpSel.get(i), bpRegisterInt.get)
            } else if (c.bpSoft) {
                when (io.control.external.get.bpSel.get(i).contains(true.B)) {
                    // Internal write bypasses to External read
                    io.rExternal(i) := PriorityMux(io.control.external.get.bpSel.get(i), bpRegisterInt.get)
                } .otherwise {
                    io.rExternal(i) := dataRegister.get(io.control.external.get.rAddr.get(i))
                }
            } else {
                io.rExternal(i) := dataRegister.get(io.control.external.get.rAddr.get(i))
            }
        } .otherwise {
            io.rExternal(i) := 0.S
        }
    }
}

defined [32mclass[39m [36mPartialRFConfig[39m
defined [32mclass[39m [36mPartialRFControl[39m
defined [32mclass[39m [36mRFConfig[39m
defined [32mclass[39m [36mRFControl[39m
defined [32mclass[39m [36mRF[39m

##### Verification

In [4]:
/*
Basic Test Checklist:
[-] Optional Hardware
    [-] No Internal Read Port
    [-] No External Read Port
    [-] No Internal Write Port
    [-] No External Write Port

[-] No Bypass
    [-] Standard Read/Write
    [-] Port Independence 
    [-] Read Enable
    [-] Write Enable

[-] Soft Bypass
    [-] Standard Read/Write
    [-] Port Independence
    [-] Read Enable
    [-] Write Enable 
    [-] Bypass Enable/Select
    
[-] Hard Bypass
    [-] Bypass Enable/Select

Better would be to check these together.
Even better would be to use Golden Model...
*/

// TODO: Do this.
val exRFConfigNoIntWrite = new RFConfig(0, 2, 2, 2, 4, 8, "None")
val exRFConfigNoExtWrite = new RFConfig(2, 0, 2, 2, 4, 8, "None")
val exRFConfigNoIntRead = new RFConfig(2, 2, 0, 2, 4, 8, "None")
val exRFConfigNoExtRead = new RFConfig(2, 2, 2, 0, 4, 8, "None")

val exRFConfigNoBypass = new RFConfig(2, 2, 2, 2, 4, 8, "None")
val exRFConfigSoftBypass = new RFConfig(2, 2, 2, 2, 4, 8, "Soft")
val exRFConfigHardBypass = new RFConfig(2, 2, 2, 2, 0, 8, "Firm")

val noIntWriteTest = Driver(() => new RF(exRFConfigNoIntWrite)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noExtWriteTest = Driver(() => new RF(exRFConfigNoExtWrite)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noIntReadTest = Driver(() => new RF(exRFConfigNoIntRead)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noExtReadTest = Driver(() => new RF(exRFConfigNoExtRead)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noBypassTest = Driver(() => new RF(exRFConfigNoBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val softBypassTest = Driver(() => new RF(exRFConfigSoftBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val hardBypassTest = Driver(() => new RF(exRFConfigHardBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}
                          

[[35minfo[0m] [0.002] Elaborating design...
[[35minfo[0m] [0.140] Done elaborating.
Total FIRRTL Compile Time: 389.8 ms
Total FIRRTL Compile Time: 142.1 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.001] SEED 1533145908508
test cmd2WrapperHelperRF Success: 0 tests passed in 5 cycles taking 0.028597 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.011] Done elaborating.
Total FIRRTL Compile Time: 101.4 ms
Total FIRRTL Compile Time: 95.1 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145909827
test cmd2WrapperHelperRF Success: 0 tests passed in 5 cycles taking 0.007759 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.008] Done elaborating.
Total FIRRTL Compile Time: 77.2 ms
Total FIRRTL Compile Time: 139.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 15331459100

[36mexRFConfigNoIntWrite[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@4e6a39cb
[36mexRFConfigNoExtWrite[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@27a7598b
[36mexRFConfigNoIntRead[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@6ed97da3
[36mexRFConfigNoExtRead[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@11fb4adb
[36mexRFConfigNoBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@ec08d4e
[36mexRFConfigSoftBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@1336f61c
[36mexRFConfigHardBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@36a7a5bc
[36mnoIntWriteTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoExtWriteTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoIntReadTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoExtReadTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoBypassTest[39m: [32mBoolean[39m = [32mtrue[39m
[36msoftBypassTest

## Inner Product Unit

##### Definition

In [5]:
class IPUConfig(val width: Int, val inBitWidth: Int, val outBitWidth: Int, val bpType: String) {
    
    require(width >= 1, "Width must be at least one.\n")
    require(List("None", "Firm").contains(bpType), "Bypass must be \"None\" or \"Firm\".\n")
    require(inBitWidth > 0 && outBitWidth > 0, "Data bitwidth must be greater than 0\n")
    
    val bpFirm = (bpType == "Firm")
}

class IPUOutput(outBitWidth: Int, bp: Boolean) extends Bundle {
    
    override def cloneType = (new IPUOutput(outBitWidth, bp)).asInstanceOf[this.type]
    
    val innerProd = SInt(outBitWidth.W)
    // Extending the bitwidths for consistency
    val bpWeight = if (bp) Some(SInt(outBitWidth.W)) else None
    val bpActvtn = if (bp) Some(SInt(outBitWidth.W)) else None
}


class IPU(c: IPUConfig) extends Module {

    val io = IO(new Bundle {
        val bpSel = if (c.bpFirm) Some(Input(Vec(c.width, Bool()))) else None
        val weightIn = Input(Vec(c.width, SInt(c.inBitWidth.W)))
        val actvtnIn = Input(Vec(c.width, SInt(c.inBitWidth.W)))
        val out = Output(new IPUOutput(c.outBitWidth, c.bpFirm))
    })
    
    private class PMult extends Module {
        val io = IO(new Bundle {
            val weightVec = Input(Vec(c.width, SInt(c.inBitWidth.W)))
            val actvtnVec = Input(Vec(c.width, SInt(c.inBitWidth.W)))
            val pairwiseProd = Output(Vec(c.width, SInt(c.outBitWidth.W)))
        })
        io.pairwiseProd := (io.weightVec zip io.actvtnVec).map { case(a, b) => a * b }
    }
    
    private class SumTree extends Module {
        val io = IO(new Bundle {
            val inVec = Input(Vec(c.width, SInt(c.outBitWidth.W)))
            val sum = Output(SInt(c.outBitWidth.W))
        })
        
        // Recursively creates a balanced syntax tree
        private def adjReduce[A](xs: List[A], op: (A, A) => A): A = xs match {
            case List(single) => single
            case default => {
                val grouped = default.grouped(2).toList
                val result = for (g <- grouped) yield { g match {
                    case List(a, b) => op(a, b)
                    case List(x) => x
                }}
                adjReduce(result, op)
            }
        }
        
        io.sum := adjReduce(io.inVec.toList, (x: SInt, y: SInt) => x + y)
    }
    
    private val pMult = Module(new PMult)
    pMult.io.weightVec := io.weightIn
    pMult.io.actvtnVec := io.actvtnIn
    
    private val sumTree = Module(new SumTree)
    sumTree.io.inVec := pMult.io.pairwiseProd
    
    io.out.innerProd := sumTree.io.sum
    
    if (c.bpFirm) {
        io.out.bpWeight.get := PriorityMux(io.bpSel.get, io.weightIn)
        io.out.bpActvtn.get := PriorityMux(io.bpSel.get, io.actvtnIn)
    }
}

defined [32mclass[39m [36mIPUConfig[39m
defined [32mclass[39m [36mIPUOutput[39m
defined [32mclass[39m [36mIPU[39m

#### Verification

In [6]:
/*
Basic Test Checklist:
[-] Inner Products

[-] None Bypass

[-] Firm Bypass
    [-] Bypass Select

[-] Differing bitwidths
    [-] Small -> Large
    [-] Large -> Small
    [-] Bypass bitwidth

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exIPUConfigNoBypass = new IPUConfig(4, 8, 8, "None")
val exIPUConfigFirmBypass = new IPUConfig(4, 8, 8, "Firm")
val exIPUConfigDiffBW1 = new IPUConfig(4, 4, 8, "None")
val exIPUConfigDiffBW2 = new IPUConfig(4, 8, 4, "None")

Driver(() => new IPU(exIPUConfigNoBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigFirmBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigDiffBW1)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigDiffBW2)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.023] Done elaborating.
Total FIRRTL Compile Time: 37.0 ms
Total FIRRTL Compile Time: 22.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145913161
test cmd4WrapperHelperIPU Success: 0 tests passed in 5 cycles taking 0.004181 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.005] Done elaborating.
Total FIRRTL Compile Time: 32.0 ms
Total FIRRTL Compile Time: 26.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145913278
test cmd4WrapperHelperIPU Success: 0 tests passed in 5 cycles taking 0.004954 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.006] Done elaborating.
Total FIRRTL Compile Time: 23.4 ms
Total FIRRTL Compile Time: 20.8 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145913363

[36mexIPUConfigNoBypass[39m: [32mIPUConfig[39m = $sess.cmd4Wrapper$Helper$IPUConfig@190e39ae
[36mexIPUConfigFirmBypass[39m: [32mIPUConfig[39m = $sess.cmd4Wrapper$Helper$IPUConfig@2dd49bd
[36mexIPUConfigDiffBW1[39m: [32mIPUConfig[39m = $sess.cmd4Wrapper$Helper$IPUConfig@79ce514
[36mexIPUConfigDiffBW2[39m: [32mIPUConfig[39m = $sess.cmd4Wrapper$Helper$IPUConfig@de2e7fb
[36mres5_4[39m: [32mBoolean[39m = [32mtrue[39m
[36mres5_5[39m: [32mBoolean[39m = [32mtrue[39m
[36mres5_6[39m: [32mBoolean[39m = [32mtrue[39m
[36mres5_7[39m: [32mBoolean[39m = [32mtrue[39m

## ALU

#### Definition

In [7]:
class ALUConfig(val dataWidth: Int, val funcs: List[String]) {
    
    require(funcs.length > 0, "Must support at least one function.")
    for(x <- funcs) { 
        require(List("Identity", "Add", "Max", "Accumulate").contains(x), "Unsupported function.")
    }
    
    val idnSupp = funcs.contains("Identity")
    val addSupp = funcs.contains("Add")
    val maxSupp = funcs.contains("Max")
    val accSupp = funcs.contains("Accumulate")
    val addBypassIn = addSupp || maxSupp
}

class ALUFSel(c: ALUConfig) extends Bundle {
    
    override def cloneType = (new ALUFSel(c)).asInstanceOf[this.type]
    
    // Priority is given from top to bottom
    val idnEnable = if (c.idnSupp) Some(Bool()) else None
    val addEnable = if (c.addSupp) Some(Bool()) else None
    val maxEnable = if (c.maxSupp) Some(Bool()) else None
    val accEnable = if (c.accSupp) Some(Bool()) else None
}

class ALU(c: ALUConfig) extends Module {
 
    val io = IO(new Bundle {
        val fSel = Input(new ALUFSel(c))
        val ipu = Input(new IPUOutput(c.dataWidth, c.addBypassIn))
        val rf = if (c.accSupp) Some(Input(SInt(c.dataWidth.W))) else None
        val out = Output(SInt(c.dataWidth.W))
    })
    
    // The inner "OrElse" clauses are logically unnecessary,
    // but Chisel can't infer that.
    when (io.fSel.idnEnable.getOrElse(false.B)) {
        io.out := io.ipu.innerProd
    } .elsewhen (io.fSel.addEnable.getOrElse(false.B)) {
        io.out := io.ipu.bpWeight.getOrElse(0.S) + io.ipu.bpActvtn.getOrElse(0.S)
    } .elsewhen (io.fSel.maxEnable.getOrElse(false.B)) {
        when (io.ipu.bpWeight.getOrElse(0.S) > io.ipu.bpActvtn.getOrElse(0.S)) {
            io.out := io.ipu.bpWeight.getOrElse(0.S)
        } .otherwise {
            io.out := io.ipu.bpActvtn.getOrElse(0.S)
        }
    } .elsewhen (io.fSel.accEnable.getOrElse(false.B)) {
        io.out := io.ipu.innerProd + io.rf.getOrElse(0.S)
    } .otherwise {
        io.out := 0.S
    }
}

defined [32mclass[39m [36mALUConfig[39m
defined [32mclass[39m [36mALUFSel[39m
defined [32mclass[39m [36mALU[39m

#### Verification

In [8]:
/*
Basic Test Checklist:
[-] Alone
    [-] Identity
    [-] Add
    [-] Max
    [-] Accumulate

[-] All functions together
    [-] Identity
    [-] Add
    [-] Max
    [-] Accumulate
    [-] Priority

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exALUConfigIdn = new ALUConfig(8, List("Identity"))
val exALUConfigAdd = new ALUConfig(8, List("Add"))
val exALUConfigMax = new ALUConfig(8, List("Max"))
val exALUConfigAcc = new ALUConfig(8, List("Accumulate"))
val exALUConfigAll = new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate"))


Driver(() => new ALU(exALUConfigIdn)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAdd)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigMax)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAcc)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAll)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.013] Done elaborating.
Total FIRRTL Compile Time: 11.0 ms
Total FIRRTL Compile Time: 8.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145914994
test cmd6WrapperHelperALU Success: 0 tests passed in 5 cycles taking 0.000930 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.003] Done elaborating.
Total FIRRTL Compile Time: 11.0 ms
Total FIRRTL Compile Time: 6.7 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145915040
test cmd6WrapperHelperALU Success: 0 tests passed in 5 cycles taking 0.001571 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 9.6 ms
Total FIRRTL Compile Time: 8.9 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.001] SEED 1533145915075
tes

[36mexALUConfigIdn[39m: [32mALUConfig[39m = $sess.cmd6Wrapper$Helper$ALUConfig@7068b3a5
[36mexALUConfigAdd[39m: [32mALUConfig[39m = $sess.cmd6Wrapper$Helper$ALUConfig@37374b45
[36mexALUConfigMax[39m: [32mALUConfig[39m = $sess.cmd6Wrapper$Helper$ALUConfig@417e684e
[36mexALUConfigAcc[39m: [32mALUConfig[39m = $sess.cmd6Wrapper$Helper$ALUConfig@65e68deb
[36mexALUConfigAll[39m: [32mALUConfig[39m = $sess.cmd6Wrapper$Helper$ALUConfig@5026ef69
[36mres7_5[39m: [32mBoolean[39m = [32mtrue[39m
[36mres7_6[39m: [32mBoolean[39m = [32mtrue[39m
[36mres7_7[39m: [32mBoolean[39m = [32mtrue[39m
[36mres7_8[39m: [32mBoolean[39m = [32mtrue[39m
[36mres7_9[39m: [32mBoolean[39m = [32mtrue[39m

## Nonlinear Unit

In [9]:
class NLUConfig(val inBitWidth: Int, val outBitWidth: Int, val funcs: List[String]) {
    
    for(x <- funcs) {
        require(List("Identity", "ReLu").contains(x), "Unsupported Function")
    }
    
    val idSupp = funcs.contains("Identity")
    val reluSupp = funcs.contains("ReLu")
    val tanhSupp = false //funcs.contains("tanh")
    val sinhSupp = false //funcs.contains("sinh")
}

class NLUFSel(c: NLUConfig) extends Bundle {
    
    override def cloneType = (new NLUFSel(c)).asInstanceOf[this.type]
    
    val idEnable = if (c.idSupp) Some(Bool()) else None
    val reluEnable = if (c.reluSupp) Some(Bool()) else None
    val tanhEnable = if (c.tanhSupp) Some(Bool()) else None 
    val sinhEnable = if (c.sinhSupp) Some(Bool()) else None
}

class NLU(c: NLUConfig) extends Module {
    
    val io = IO(new Bundle {
        val fSel = Input(new NLUFSel(c))
        val in = Input(SInt(c.inBitWidth.W))
        val out = Output(SInt(c.outBitWidth.W))
    })
    
    when (io.fSel.idEnable.getOrElse(false.B)) {
        io.out := io.in
    } .elsewhen (io.fSel.reluEnable.getOrElse(false.B)) {
        when (io.in.data > 0.S) {
            io.out := io.in.data
        } .otherwise {
            io.out := 0.S
        }
    } .elsewhen (io.fSel.tanhEnable.getOrElse(false.B)) {
        // TODO
        io.out := 0.S
    } .elsewhen (io.fSel.sinhEnable.getOrElse(false.B)) {
        // TODO
        io.out := 0.S
    } .otherwise {
        io.out := 0.S
    }
}

defined [32mclass[39m [36mNLUConfig[39m
defined [32mclass[39m [36mNLUFSel[39m
defined [32mclass[39m [36mNLU[39m

In [10]:
/*
Basic Test Checklist:
[ ] Alone
    [ ] Identity
    [ ] ReLu
    [ ] tanh
    [ ] sinh

[ ] All functions together
    [ ] Identity works
    [ ] ReLu works
    [ ] tanh works
    [ ] sinh works
    [ ] Correct Priority

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exNLUConfigId = new NLUConfig(8, 8, List("Identity"))
val exNLUConfigReLu = new NLUConfig(8, 8, List("ReLu"))
val exNLUConfigIdReLu = new NLUConfig(8, 8, List("Identity", "ReLu"))

Driver(() => new NLU(exNLUConfigId)) {
    uut => new PeekPokeTester(uut) {

    }
}

Driver(() => new NLU(exNLUConfigReLu)) {
    uut => new PeekPokeTester(uut) {

    }
}

Driver(() => new NLU(exNLUConfigIdReLu)) {
    uut => new PeekPokeTester(uut) {

    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.010] Done elaborating.
Total FIRRTL Compile Time: 7.1 ms
Total FIRRTL Compile Time: 5.9 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145916419
test cmd8WrapperHelperNLU Success: 0 tests passed in 5 cycles taking 0.001226 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 5.9 ms
Total FIRRTL Compile Time: 6.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145916456
test cmd8WrapperHelperNLU Success: 0 tests passed in 5 cycles taking 0.000861 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 7.1 ms
Total FIRRTL Compile Time: 5.9 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145916481
test 

[36mexNLUConfigId[39m: [32mNLUConfig[39m = $sess.cmd8Wrapper$Helper$NLUConfig@505b1c7d
[36mexNLUConfigReLu[39m: [32mNLUConfig[39m = $sess.cmd8Wrapper$Helper$NLUConfig@1f3456b8
[36mexNLUConfigIdReLu[39m: [32mNLUConfig[39m = $sess.cmd8Wrapper$Helper$NLUConfig@7733fa61
[36mres9_3[39m: [32mBoolean[39m = [32mtrue[39m
[36mres9_4[39m: [32mBoolean[39m = [32mtrue[39m
[36mres9_5[39m: [32mBoolean[39m = [32mtrue[39m

## Control

### State Machine

#### Definition

In [11]:
class StateMachineConfig(
        val numStates: Int, 
        val numCtrlSigs: Int, 
        val stateMap: (UInt, UInt, StateMachineConfig) => UInt) {
    
    val stateWidth = log2Up(numStates)
    val ctrlWidth = log2Up(numCtrlSigs)
}

class StateMachine(c: StateMachineConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(UInt(c.ctrlWidth.W))
        val out = Output(UInt(c.stateWidth.W))
    })
    
    val register = RegInit(0.U(c.stateWidth.W))
    register := c.stateMap(register, io.control, c)
    
    io.out := register
}

defined [32mclass[39m [36mStateMachineConfig[39m
defined [32mclass[39m [36mStateMachine[39m

#### Example

In [12]:
def exampleStateMap(state: UInt, control: UInt, c: StateMachineConfig): UInt = {
    
    val nextState = Wire(UInt(c.stateWidth.W))
    
    when      (state === 0.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 0.U & control === 1.U) { nextState := 1.U }
    .elsewhen (state === 1.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 1.U & control === 1.U) { nextState := 1.U }
    .otherwise { nextState := 0.U }
    
    nextState
}

defined [32mfunction[39m [36mexampleStateMap[39m

#### Verification

In [13]:
/*
Basic Test Checklist:
[x] Correct State Transitions

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exampleStateMachineConfig = new StateMachineConfig(2, 2, exampleStateMap)

Driver(() => new StateMachine(exampleStateMachineConfig)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.control, 0)
        expect(uut.io.out, 0)
        
        // 0 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
        
        // 0 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.006] Done elaborating.
Total FIRRTL Compile Time: 11.6 ms
Total FIRRTL Compile Time: 8.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145917825
test cmd10WrapperHelperStateMachine Success: 5 tests passed in 9 cycles taking 0.004959 seconds
[[35minfo[0m] [0.004] RAN 4 CYCLES PASSED


[36mexampleStateMachineConfig[39m: [32mStateMachineConfig[39m = $sess.cmd10Wrapper$Helper$StateMachineConfig@1f052b62
[36mres12_1[39m: [32mBoolean[39m = [32mtrue[39m

### Decoder

#### Definition

In [14]:
class PEConfig(
        val weightRFConfig: RFConfig,
        val actvtnRFConfig: RFConfig,
        val scratchRFConfig: RFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig,
        val smConfig: StateMachineConfig,
        val decodeWeightRF: (UInt, RFConfig) => Data,
        val decodeActvtnRF: (UInt, RFConfig) => Data,
        val decodeScratchRF: (UInt, RFConfig) => Data,
        val decodeIPU: (UInt, IPUConfig) => Data,
        val decodeALU: (UInt, ALUConfig) => Data,
        val decodeNLU: (UInt, NLUConfig) => Data) {

    require(ipuConfig.width == weightRFConfig.numIntOutputs, 
        "IPU input width not equal to Weight RF Internal Output width.\n")
    require(ipuConfig.width == actvtnRFConfig.numIntOutputs,
        "IPU input width not equal to Activation RF Internal Output width.\n")
    
    if(ipuConfig.bpFirm) {
        require(aluConfig.addSupp || aluConfig.maxSupp,
            "Incompatible ALU and IPU Configurations")
    }
}

class MemoryControl(c: PEConfig) extends Bundle {
    
    override def cloneType = (new MemoryControl(c)).asInstanceOf[this.type]
    
    val weightRF = new RFControl(c.weightRFConfig)
    val actvtnRF = new RFControl(c.actvtnRFConfig)
    val scratchRF = new RFControl(c.scratchRFConfig)
}

class ProcessControl(c: PEConfig) extends Bundle {
    
    override def cloneType = (new ProcessControl(c)).asInstanceOf[this.type]
    
    val aluFSel = Output(new ALUFSel(c.aluConfig))
    val nluFSel = Output(new NLUFSel(c.nluConfig))
    
    val ipuBpSel = if (c.ipuConfig.bpFirm) Some(Output(Vec(c.ipuConfig.width, Bool()))) else None
}

class Decoder(c: PEConfig) extends Module {
    
    val io = IO(new Bundle {
        val state = Input(UInt(c.smConfig.stateWidth.W))
        val mem = Output(new MemoryControl(c))
        val proc = Output(new ProcessControl(c))
    })
    
    io.mem.weightRF <> c.decodeWeightRF(io.state, c.weightRFConfig)
    io.mem.actvtnRF <> c.decodeActvtnRF(io.state, c.actvtnRFConfig)
    io.mem.scratchRF <> c.decodeScratchRF(io.state, c.scratchRFConfig)
    
    if (c.ipuConfig.bpFirm) { 
        io.proc.ipuBpSel.get := c.decodeIPU(io.state, c.ipuConfig)
    }
    
    io.proc.aluFSel <> c.decodeALU(io.state, c.aluConfig)
    io.proc.nluFSel <> c.decodeNLU(io.state, c.nluConfig)
}

defined [32mclass[39m [36mPEConfig[39m
defined [32mclass[39m [36mMemoryControl[39m
defined [32mclass[39m [36mProcessControl[39m
defined [32mclass[39m [36mDecoder[39m

#### Example

In [15]:
val exampleDecodeWeightRF = (state: UInt, c: RFConfig) => {
    
    val data = Wire(new RFControl(c))
    
    val ic = c.intConfig
    val ec = c.extConfig
    
    when (state === 0.U) {
        
        // Can't call Vec.fill on size 0.
        // You shouldn't be doing anything like this since you know
        // the shape of the RFs in advanced. Also, this is just terrible.
        if (ic.numInputs > 0) {
            data.internal.get.wEnable := Vec.fill(ic.numInputs){true.B}
            data.internal.get.wAddr.get := Vec.fill(ic.numInputs){1.U}
        }
        
        if (ic.numOutputs > 0) {
            data.internal.get.rEnable := Vec.fill(ic.numOutputs){true.B}
            data.internal.get.rAddr.get := Vec.fill(ic.numOutputs){2.U}
            if (ic.numCrossInputs > 0) {
                data.internal.get.bpSel.get := Vec.fill(ic.numOutputs){
                    Vec.fill(ic.numCrossInputs){true.B}}
            }
        }
        
        if (ec.numInputs > 0) {
            data.external.get.wEnable := Vec.fill(ec.numInputs){true.B}
            data.external.get.wAddr.get := Vec.fill(ec.numInputs){3.U}
        }
        
        if (ec.numOutputs > 0) {
            data.external.get.rEnable := Vec.fill(ec.numOutputs){true.B}
            data.external.get.rAddr.get := Vec.fill(ec.numOutputs){4.U}
            if (ec.numCrossInputs > 0) {
                data.external.get.bpSel.get := Vec.fill(ec.numOutputs){
                    Vec.fill(ec.numCrossInputs){true.B}}
            }
        }
        
    } .otherwise {
        
        if (ic.numInputs > 0) {
            data.internal.get.wAddr.get := Vec.fill(ic.numInputs){5.U}
            data.internal.get.wEnable := Vec.fill(ic.numInputs){false.B}
        }
        
        if (ic.numOutputs > 0) {
            data.internal.get.rEnable := Vec.fill(ic.numOutputs){false.B}
            data.internal.get.rAddr.get := Vec.fill(ic.numOutputs){6.U}
            if(ic.numCrossInputs > 0) {
                data.internal.get.bpSel.get := Vec.fill(ic.numOutputs){
                    Vec.fill(ic.numCrossInputs){false.B}}
            }
        }
        
        if (ec.numInputs > 0) {
            data.external.get.wEnable := Vec.fill(ec.numInputs){false.B}
            data.external.get.wAddr.get := Vec.fill(ec.numInputs){7.U}
        }
        
        if(ec.numOutputs > 0) {
            data.external.get.rEnable := Vec.fill(ec.numOutputs){false.B}
            data.external.get.rAddr.get := Vec.fill(ec.numOutputs){8.U}
            if(ec.numCrossInputs > 0) {
                data.external.get.bpSel.get := Vec.fill(ec.numOutputs){
                    Vec.fill(ec.numCrossInputs){false.B}}
        
            }
        }
    }
    
    data
}

val exampleDecodeActvtnRF = exampleDecodeWeightRF
val exampleDecodeScratchRF = exampleDecodeWeightRF

def exampleDecodeIPU(state: UInt, c: IPUConfig) = {
    
    val data = Wire(Vec(c.width, Bool()))
    
    when (state === 0.U) {
        data := Vec(false.B :: true.B :: Nil)
    } .otherwise {
        data := Vec(true.B :: false.B :: Nil)
    }
    
    data
}

def exampleDecodeALU(state: UInt, c: ALUConfig) = {
    
    val data = Wire(new ALUFSel(c))
    
    when (state === 0.U) {
        data.idnEnable.get := true.B
        data.addEnable.get := false.B
        data.maxEnable.get := false.B
        data.accEnable.get := false.B
    } .otherwise {
        data.idnEnable.get := false.B
        data.addEnable.get := true.B
        data.maxEnable.get := false.B
        data.accEnable.get := false.B
    }
    
    data
}

def exampleDecodeNLU(state: UInt, c: NLUConfig) = {
    
    val data = Wire(new NLUFSel(c))
    
    when (state === 0.U) {
        data.idEnable.get := true.B
        data.reluEnable.get := false.B
    } .otherwise {
        data.idEnable.get := false.B
        data.reluEnable.get := true.B
    }
    
    data
}


[36mexampleDecodeWeightRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
[36mexampleDecodeActvtnRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
[36mexampleDecodeScratchRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
defined [32mfunction[39m [36mexampleDecodeIPU[39m
defined [32mfunction[39m [36mexampleDecodeALU[39m
defined [32mfunction[39m [36mexampleDecodeNLU[39m

#### Verification

In [16]:
val examplePEConfigForDecoder = new PEConfig(
    new RFConfig(1, 1, 2, 1, 4, 8, "Soft"),
    new RFConfig(1, 1, 2, 1, 4, 8, "Soft"),
    new RFConfig(1, 1, 1, 1, 4, 8, "Soft"),
    new IPUConfig(2, 8, 8, "Firm"),
    new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate")),
    new NLUConfig(8, 8, List("Identity", "ReLu")),
    new StateMachineConfig(4, 4, exampleStateMap),
    exampleDecodeWeightRF,
    exampleDecodeActvtnRF,
    exampleDecodeScratchRF,
    exampleDecodeIPU,
    exampleDecodeALU,
    exampleDecodeNLU
)


Driver(() => new Decoder(examplePEConfigForDecoder)) {
    
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.state, 0.U)
        step(1)
        
        expect(uut.io.mem.weightRF.internal.get.wEnable(0), true.B)
        expect(uut.io.mem.weightRF.internal.get.rEnable(0), true.B)
        expect(uut.io.mem.weightRF.internal.get.wAddr.get(0), 1.U)
        expect(uut.io.mem.weightRF.internal.get.rAddr.get(0), 2.U)
        expect(uut.io.mem.weightRF.internal.get.bpSel.get(0)(0), true.B)
        
        expect(uut.io.mem.weightRF.external.get.wEnable(0), true.B)
        expect(uut.io.mem.weightRF.external.get.rEnable(0), true.B)
        expect(uut.io.mem.weightRF.external.get.wAddr.get(0), 3.U)
        expect(uut.io.mem.weightRF.external.get.rAddr.get(0), 4.U)
        expect(uut.io.mem.weightRF.external.get.bpSel.get(0)(0), true.B)
        
        expect(uut.io.mem.actvtnRF.internal.get.wEnable(0), true.B)
        expect(uut.io.mem.actvtnRF.internal.get.rEnable(0), true.B)
        expect(uut.io.mem.actvtnRF.internal.get.wAddr.get(0), 1.U)
        expect(uut.io.mem.actvtnRF.internal.get.rAddr.get(0), 2.U)
        expect(uut.io.mem.actvtnRF.internal.get.bpSel.get(0)(0), true.B)
        
        expect(uut.io.mem.actvtnRF.external.get.wEnable(0), true.B)
        expect(uut.io.mem.actvtnRF.external.get.rEnable(0), true.B)
        expect(uut.io.mem.actvtnRF.external.get.wAddr.get(0), 3.U)
        expect(uut.io.mem.actvtnRF.external.get.rAddr.get(0), 4.U)
        expect(uut.io.mem.actvtnRF.external.get.bpSel.get(0)(0), true.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 0)
        expect(uut.io.proc.ipuBpSel.get(1), 1)
        
        expect(uut.io.proc.aluFSel.idnEnable.get, 1)
        expect(uut.io.proc.aluFSel.addEnable.get, 0)
        expect(uut.io.proc.aluFSel.maxEnable.get, 0)
        expect(uut.io.proc.aluFSel.accEnable.get, 0)
        
        expect(uut.io.mem.scratchRF.internal.get.wEnable(0), true.B)
        expect(uut.io.mem.scratchRF.internal.get.rEnable(0), true.B)
        expect(uut.io.mem.scratchRF.internal.get.wAddr.get(0), 1.U)
        expect(uut.io.mem.scratchRF.internal.get.rAddr.get(0), 2.U)
        expect(uut.io.mem.scratchRF.internal.get.bpSel.get(0)(0), true.B)
        
        expect(uut.io.mem.scratchRF.external.get.wEnable(0), true.B)
        expect(uut.io.mem.scratchRF.external.get.rEnable(0), true.B)
        expect(uut.io.mem.scratchRF.external.get.wAddr.get(0), 3.U)
        expect(uut.io.mem.scratchRF.external.get.rAddr.get(0), 4.U)
        expect(uut.io.mem.scratchRF.external.get.bpSel.get(0)(0), true.B)

        expect(uut.io.proc.nluFSel.idEnable.get, 1)
        expect(uut.io.proc.nluFSel.reluEnable.get, 0)
        
        poke(uut.io.state, 1.U) 
        step(1)
        
        expect(uut.io.mem.weightRF.internal.get.wEnable(0), false.B)
        expect(uut.io.mem.weightRF.internal.get.rEnable(0), false.B)
        expect(uut.io.mem.weightRF.internal.get.wAddr.get(0), 5.U)
        expect(uut.io.mem.weightRF.internal.get.rAddr.get(0), 6.U)
        expect(uut.io.mem.weightRF.internal.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.mem.weightRF.external.get.wEnable(0), false.B)
        expect(uut.io.mem.weightRF.external.get.rEnable(0), false.B)
        expect(uut.io.mem.weightRF.external.get.wAddr.get(0), 7.U)
        expect(uut.io.mem.weightRF.external.get.rAddr.get(0), 8.U)
        expect(uut.io.mem.weightRF.external.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.mem.actvtnRF.internal.get.wEnable(0), false.B)
        expect(uut.io.mem.actvtnRF.internal.get.rEnable(0), false.B)
        expect(uut.io.mem.actvtnRF.internal.get.wAddr.get(0), 5.U)
        expect(uut.io.mem.actvtnRF.internal.get.rAddr.get(0), 6.U)
        expect(uut.io.mem.actvtnRF.internal.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.mem.actvtnRF.external.get.wEnable(0), false.B)
        expect(uut.io.mem.actvtnRF.external.get.rEnable(0), false.B)
        expect(uut.io.mem.actvtnRF.external.get.wAddr.get(0), 7.U)
        expect(uut.io.mem.actvtnRF.external.get.rAddr.get(0), 8.U)
        expect(uut.io.mem.actvtnRF.external.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 1)
        expect(uut.io.proc.ipuBpSel.get(1), 0)
        
        expect(uut.io.proc.aluFSel.idnEnable.get, 0)
        expect(uut.io.proc.aluFSel.addEnable.get, 1)
        expect(uut.io.proc.aluFSel.maxEnable.get, 0)
        expect(uut.io.proc.aluFSel.accEnable.get, 0)
        
        expect(uut.io.mem.scratchRF.internal.get.wEnable(0), false.B)
        expect(uut.io.mem.scratchRF.internal.get.rEnable(0), false.B)
        expect(uut.io.mem.scratchRF.internal.get.wAddr.get(0), 5.U)
        expect(uut.io.mem.scratchRF.internal.get.rAddr.get(0), 6.U)
        expect(uut.io.mem.scratchRF.internal.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.mem.scratchRF.external.get.wEnable(0), false.B)
        expect(uut.io.mem.scratchRF.external.get.rEnable(0), false.B)
        expect(uut.io.mem.scratchRF.external.get.wAddr.get(0), 7.U)
        expect(uut.io.mem.scratchRF.external.get.rAddr.get(0), 8.U)
        expect(uut.io.mem.scratchRF.external.get.bpSel.get(0)(0), false.B)
        
        expect(uut.io.proc.nluFSel.idEnable.get, 0)
        expect(uut.io.proc.nluFSel.reluEnable.get, 1)
        
    }
}


[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.051] Done elaborating.
Total FIRRTL Compile Time: 81.5 ms
Total FIRRTL Compile Time: 43.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533145921179
test cmd13WrapperHelperDecoder Success: 76 tests passed in 7 cycles taking 0.028451 seconds
[[35minfo[0m] [0.022] RAN 2 CYCLES PASSED


[36mexamplePEConfigForDecoder[39m: [32mPEConfig[39m = $sess.cmd13Wrapper$Helper$PEConfig@25ee56ff
[36mres15_1[39m: [32mBoolean[39m = [32mtrue[39m

## PE

##### Definition

In [17]:
class PE(c: PEConfig) extends Module {
    
    val cw = c.weightRFConfig
    val ca = c.actvtnRFConfig
    val cs = c.scratchRFConfig
    
    val io = IO(new Bundle {
        val stateCtrl = Input(UInt(c.smConfig.ctrlWidth.W))
        val toWeightRF = Input(Vec(cw.numExtInputs, SInt(cw.dataWidth.W))) 
        val toActvtnRF = Input(Vec(ca.numExtInputs, SInt(ca.dataWidth.W)))
        val toScratchRF = Input(Vec(cs.numExtInputs, SInt(cs.dataWidth.W)))
        val fromWeightRF = Output(Vec(cw.numExtOutputs, SInt(cw.dataWidth.W)))
        val fromActvtnRF = Output(Vec(ca.numExtOutputs, SInt(ca.dataWidth.W)))
        val fromScratchRF = Output(Vec(cs.numExtOutputs, SInt(cs.dataWidth.W)))
        val totalOutput = Output(SInt(c.nluConfig.outBitWidth.W))
    })
    
    val stateMachine = Module(new StateMachine(c.smConfig))
    stateMachine.io.control := io.stateCtrl
    
    val decoder = Module(new Decoder(c))
    decoder.io.state := stateMachine.io.out
    
    val weightRF = Module(new RF(cw))
    weightRF.io.control <> decoder.io.mem.weightRF
    weightRF.io.wExternal := io.toWeightRF
    io.fromWeightRF := weightRF.io.rExternal
    
    val actvtnRF = Module(new RF(ca))
    actvtnRF.io.control <> decoder.io.mem.actvtnRF
    actvtnRF.io.wExternal := io.toActvtnRF
    io.fromActvtnRF := actvtnRF.io.rExternal
       
    val ipu = Module(new IPU(c.ipuConfig))
    if (ipu.io.bpSel.isDefined) { 
        ipu.io.bpSel.get := decoder.io.proc.ipuBpSel.get 
    }
    ipu.io.weightIn := weightRF.io.rInternal
    ipu.io.actvtnIn := actvtnRF.io.rInternal

    val alu = Module(new ALU(c.aluConfig))
    alu.io.fSel <> decoder.io.proc.aluFSel
    alu.io.ipu <> ipu.io.out
    
    val scratchRF = Module(new RF(cs))
    scratchRF.io.control <> decoder.io.mem.scratchRF
    scratchRF.io.wExternal := io.toScratchRF
    scratchRF.io.wInternal(0) := alu.io.out // TODO: Add Req. for this
    io.fromScratchRF := scratchRF.io.rExternal
    if(alu.io.rf.isDefined) { 
        alu.io.rf.get := scratchRF.io.rInternal(0) // TODO: Add Req. for this
    }
    
    val nlu = Module(new NLU(c.nluConfig))
    nlu.io.fSel <> decoder.io.proc.nluFSel
    nlu.io.in := scratchRF.io.rInternal(0) // TODO: Add Req. for this
    
    io.totalOutput := nlu.io.out
}

defined [32mclass[39m [36mPE[39m

##### Verification

In [20]:
/*
 * Row stationary 1D convolution
 *
 * (1, 2, 3) * (1, 2, 3, 4, 5) = (14, 20, 26)
 */

val rsDecodeWeightRF = (state: UInt, c: RFConfig) => {
    
    val ctrl = Wire(new RFControl(c))
    
    when (state === 0.U || state === 1.U || state === 2.U) {
        ctrl.external.get.wEnable(0) := true.B
        ctrl.external.get.wAddr.get(0) := state
    } .otherwise {
        ctrl.external.get.wEnable(0) := false.B
        ctrl.external.get.wAddr.get(0) := DontCare
    }
    
    when (state >= 1.U && state <= 9.U) {
        ctrl.internal.get.rEnable(0) := true.B
        ctrl.internal.get.rAddr.get(0) := (state - 1.U) % 3.U
    } .otherwise {
        ctrl.internal.get.rEnable(0) := false.B
        ctrl.internal.get.rAddr.get(0) := DontCare
    }

    ctrl
}

val rsDecodeActvtnRF = (state: UInt, c: RFConfig) => {
    
    val ctrl = Wire(new RFControl(c))
    
    when (state <= 8.U) {
        ctrl.external.get.wEnable(0) := true.B
        ctrl.external.get.wAddr.get(0) := state % 3.U
    } .otherwise {
        ctrl.external.get.wEnable(0) := false.B
        ctrl.external.get.wAddr.get(0) := DontCare
    }
    
    when (state >= 1.U && state <= 9.U) {
        ctrl.internal.get.rEnable(0) := true.B
        ctrl.internal.get.rAddr.get(0) := (state - 1.U) % 3.U
    } .otherwise {
        ctrl.internal.get.rEnable(0) := false.B
        ctrl.internal.get.rAddr.get(0) := DontCare
    }
    
    ctrl
}

val rsDecodeScratchRF = (state: UInt, c: RFConfig) => {
    
    val ctrl = Wire(new RFControl(c))
    
    when (state >= 1.U && state <= 9.U) {
        ctrl.internal.get.wEnable(0) := true.B
    } .otherwise {
        ctrl.internal.get.wEnable(0) := false.B
    }
    
    when (state >= 1.U && state <= 10.U && state =/= 4.U && state =/= 7.U) {
        ctrl.internal.get.rEnable(0) := true.B
    } .otherwise {
        ctrl.internal.get.rEnable(0) := false.B
    }
    
    when (state === 1.U) {
        ctrl.internal.get.rAddr.get(0) := DontCare
        ctrl.internal.get.wAddr.get(0) := 0.U
    } .elsewhen (state === 2.U) {
        ctrl.internal.get.rAddr.get(0) := 0.U
        ctrl.internal.get.wAddr.get(0) := 1.U
    } .elsewhen (state === 3.U) {
        ctrl.internal.get.rAddr.get(0) := 1.U
        ctrl.internal.get.wAddr.get(0) := 0.U
    } .elsewhen (state === 4.U) {
        ctrl.internal.get.rAddr.get(0) := DontCare
        ctrl.internal.get.wAddr.get(0) := 1.U
    } .elsewhen (state === 5.U) {
        ctrl.internal.get.rAddr.get(0) := 1.U
        ctrl.internal.get.wAddr.get(0) := 2.U
    } .elsewhen (state === 6.U) {
        ctrl.internal.get.rAddr.get(0) := 2.U
        ctrl.internal.get.wAddr.get(0) := 1.U
    } .elsewhen (state === 7.U) {
        ctrl.internal.get.rAddr.get(0) := DontCare
        ctrl.internal.get.wAddr.get(0) := 2.U
    } .elsewhen (state === 8.U) {
        ctrl.internal.get.rAddr.get(0) := 2.U
        ctrl.internal.get.wAddr.get(0) := 3.U
    } .elsewhen (state === 9.U) {
        ctrl.internal.get.rAddr.get(0) := 3.U
        ctrl.internal.get.wAddr.get(0) := 2.U
    } .elsewhen (state === 10.U) {
        ctrl.internal.get.rAddr.get(0) := 2.U
        ctrl.internal.get.wAddr.get(0) := DontCare
    } .otherwise {
        ctrl.internal.get.rAddr.get(0) := DontCare
        ctrl.internal.get.wAddr.get(0) := DontCare
    }
    
    ctrl
}

val rsDecodeIPU = (state: UInt, c: IPUConfig) => {
    val bpSel = Wire(Vec(c.width, Bool()))
    bpSel
}

val rsDecodeALU = (state: UInt, c: ALUConfig) => {
    val fSel = Wire(new ALUFSel(c)) 
    when (state === 1.U || state === 4.U || state === 7.U) {
        fSel.idnEnable.get := true.B
        fSel.accEnable.get := false.B
    } .elsewhen (state > 1.U && state < 10.U) {
        fSel.idnEnable.get := false.B
        fSel.accEnable.get := true.B
    } .otherwise {
        fSel.idnEnable.get := false.B
        fSel.accEnable.get := false.B
    }
    fSel
}

val rsDecodeNLU = (state: UInt, c: NLUConfig) => {
    val fSel = Wire(new NLUFSel(c))
    fSel.idEnable.get := true.B
    fSel
}

val rsStateMap = (state: UInt, control: UInt, c: StateMachineConfig) => {
    
    val nextState = Wire(UInt(c.stateWidth.W))
    
    when(control === 1.U && state < 10.U) {
        nextState := state + 1.U
    } .otherwise {
        nextState := state
    }
    
    nextState
}

val rsPEConfig = new PEConfig(
    new RFConfig(0, 1, 1, 0, 4, 16, "None"),
    new RFConfig(0, 1, 1, 0, 4, 16, "None"),
    new RFConfig(1, 0, 1, 0, 4, 16, "None"),
    new IPUConfig(1, 16, 16, "None"),
    new ALUConfig(8, List("Identity", "Accumulate")),
    new NLUConfig(8, 8, List("Identity")),
    new StateMachineConfig(4, 4, rsStateMap),
    rsDecodeWeightRF,
    rsDecodeActvtnRF,
    rsDecodeScratchRF,
    rsDecodeIPU,
    rsDecodeALU,
    rsDecodeNLU
)

Driver(() => new PE(rsPEConfig)) {
    uut => new PeekPokeTester(uut) {
        
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.047] Done elaborating.
Total FIRRTL Compile Time: 164.2 ms
Total FIRRTL Compile Time: 125.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533165266381
test cmd16WrapperHelperPE Success: 0 tests passed in 5 cycles taking 0.012225 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED


[36mrsDecodeWeightRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
[36mrsDecodeActvtnRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
[36mrsDecodeScratchRF[39m: ([32mUInt[39m, [32mRFConfig[39m) => [32mRFControl[39m = <function2>
[36mrsDecodeIPU[39m: ([32mUInt[39m, [32mIPUConfig[39m) => [32mVec[39m[[32mBool[39m] = <function2>
[36mrsDecodeALU[39m: ([32mUInt[39m, [32mALUConfig[39m) => [32mALUFSel[39m = <function2>
[36mrsDecodeNLU[39m: ([32mUInt[39m, [32mNLUConfig[39m) => [32mNLUFSel[39m = <function2>
[36mrsStateMap[39m: ([32mUInt[39m, [32mUInt[39m, [32mStateMachineConfig[39m) => [32mUInt[39m = <function3>
[36mrsPEConfig[39m: [32mPEConfig[39m = $sess.cmd13Wrapper$Helper$PEConfig@503f5e58
[36mres19_8[39m: [32mBoolean[39m = [32mtrue[39m