# Unified Processing Engine
#### Verification | Version 0.6.1 | Updated 2018.7.30
___

## Setup

In [1]:
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

[36mpath[39m: [32mString[39m = [32m"""
C:\Users\RyanL\OneDrive\Research\SEAL\processing-engine/source/load-ivy.sc
"""[39m

In [2]:
import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

import scala.math.pow

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}

[39m
[32mimport [39m[36mscala.math.pow[39m

## Register File

##### Definition

In [3]:
class PartialRFConfig(
        val numInputs: Int,
        val numOutputs: Int,
        val numCrossInputs: Int,
        val addrWidth: Int,
        val bpSoft: Boolean,
        val bpFirm: Boolean)

class PartialRFControl(c: PartialRFConfig) extends Bundle {
    val wEnable = Vec(c.numInputs, Bool())
    val rEnable = Vec(c.numOutputs, Bool())
    val wAddr = if (!c.bpFirm) Some(Vec(c.numInputs, UInt(c.addrWidth.W))) else None
    val rAddr = if (!c.bpFirm) Some(Vec(c.numOutputs, UInt(c.addrWidth.W))) else None
    // Each output can select which input of the opposite bus to bypass from
    val bpSel = if (c.bpSoft || c.bpFirm) Some(Vec(c.numOutputs, Vec(c.numCrossInputs, Bool()))) else None
}

class RFConfig(
        val numIntInputs: Int,
        val numExtInputs: Int,
        val numIntOutputs: Int,
        val numExtOutputs: Int,
        val addrWidth: Int,
        val dataWidth: Int,
        val bpType: String) {
    
    val bpNone = (bpType == "None")
    val bpSoft = (bpType == "Soft")
    val bpFirm = (bpType == "Firm")
    
    require(bpNone || bpSoft || bpFirm, "Invalid Bypass type.\n")
    require(numIntInputs > 0 || numExtInputs > 0, "Must have at least one input.\n")
    require(numIntOutputs > 0 || numExtOutputs > 0, "Must have at least one output.\n")
    require(dataWidth > 0, "Data bitwidth must be at least one.\n") 
    if (bpFirm) { require(addrWidth == 0, "Address width must be 0 when Firm Bypassing.\n") }
    
    val intConfig = new PartialRFConfig(
        numIntInputs, numIntOutputs, numExtOutputs, addrWidth, bpSoft, bpFirm)
    
    val extConfig = new PartialRFConfig(
        numExtInputs, numExtOutputs, numIntOutputs, addrWidth, bpSoft, bpFirm)
}

class RFControl(c: RFConfig) extends Bundle {
    
    override def cloneType = (new RFControl(c)).asInstanceOf[this.type]
    
    val internal = if (c.numIntInputs > 0 || c.numIntOutputs > 0)
        Some(new PartialRFControl(c.intConfig)) else None
    val external = if (c.numExtInputs > 0 || c.numExtOutputs > 0)
        Some(new PartialRFControl(c.extConfig)) else None
}

class RF(c: RFConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(new RFControl(c))
        val wInternal = Input(Vec(c.numIntInputs, SInt(c.dataWidth.W))) 
        val wExternal = Input(Vec(c.numExtInputs, SInt(c.dataWidth.W)))
        val rInternal = Output(Vec(c.numIntOutputs, SInt(c.dataWidth.W)))
        val rExternal = Output(Vec(c.numExtOutputs, SInt(c.dataWidth.W)))
    })
    
    val dataRegister = if (!c.bpFirm) 
        Some(RegInit(Vec.fill(pow(2, c.addrWidth).toInt){0.S(c.dataWidth.W)})) else None
    
    // Need to bypass through a register to prevent combinational loops
    val bpAny = c.bpSoft || c.bpFirm
    val bpRegisterInt = if (bpAny && c.numIntInputs > 0)
        Some(RegInit(Vec.fill(c.numIntInputs){0.S(c.dataWidth.W)})) else None
    val bpRegisterExt = if (bpAny && c.numExtInputs > 0)
        Some(RegInit(Vec.fill(c.numExtInputs){0.S(c.dataWidth.W)})) else None
    
    for (i <- 0 until c.numIntInputs) {
        when (io.control.internal.get.wEnable(i)) {
            if (!c.bpFirm) { dataRegister.get(io.control.internal.get.wAddr.get(i)) := io.wInternal(i) }
            if (bpRegisterInt.isDefined) { bpRegisterInt.get(i) := io.wInternal(i) }
        }
    }
    
    for (i <- 0 until c.numExtInputs) {
        when (io.control.external.get.wEnable(i)) {
            if (!c.bpFirm) { dataRegister.get(io.control.external.get.wAddr.get(i)) := io.wExternal(i) }
            if (bpRegisterExt.isDefined) { bpRegisterExt.get(i) := io.wExternal(i) }
        }
    }
    
    for (i <- 0 until c.numIntOutputs) {
        when (io.control.internal.get.rEnable(i)) {
            if (c.bpFirm) {
                io.rInternal(i) := PriorityMux(io.control.internal.get.bpSel.get(i), bpRegisterExt.get)
            } else if (c.bpSoft) {
                when (io.control.internal.get.bpSel.get(i).contains(true.B)) {
                    // External write bypasses to Internal read
                    io.rInternal(i) := PriorityMux(io.control.internal.get.bpSel.get(i), bpRegisterExt.get)
                } .otherwise {
                    io.rInternal(i) := dataRegister.get(io.control.internal.get.rAddr.get(i))
                }
            } else {
                io.rInternal(i) := dataRegister.get(io.control.internal.get.rAddr.get(i))
            }
        } .otherwise {
            io.rInternal(i) := 0.S
        }
    }
    
    for (i <- 0 until c.numExtOutputs) {
        when (io.control.external.get.rEnable(i)) {
            if (c.bpFirm) {
                io.rExternal(i) := PriorityMux(io.control.external.get.bpSel.get(i), bpRegisterInt.get)
            } else if (c.bpSoft) {
                when (io.control.external.get.bpSel.get(i).contains(true.B)) {
                    // Internal write bypasses to External read
                    io.rExternal(i) := PriorityMux(io.control.external.get.bpSel.get(i), bpRegisterInt.get)
                } .otherwise {
                    io.rExternal(i) := dataRegister.get(io.control.external.get.rAddr.get(i))
                }
            } else {
                io.rExternal(i) := dataRegister.get(io.control.external.get.rAddr.get(i))
            }
        } .otherwise {
            io.rExternal(i) := 0.S
        }
    }
}

defined [32mclass[39m [36mPartialRFConfig[39m
defined [32mclass[39m [36mPartialRFControl[39m
defined [32mclass[39m [36mRFConfig[39m
defined [32mclass[39m [36mRFControl[39m
defined [32mclass[39m [36mRF[39m

##### Verification

In [4]:
/*
Basic Test Checklist:
[-] Optional Hardware
    [-] No Internal Read Port
    [-] No External Read Port
    [-] No Internal Write Port
    [-] No External Write Port

[-] No Bypass
    [-] Standard Read/Write
    [-] Port Independence 
    [-] Read Enable
    [-] Write Enable

[-] Soft Bypass
    [-] Standard Read/Write
    [-] Port Independence
    [-] Read Enable
    [-] Write Enable 
    [-] Bypass Enable/Select
    
[-] Hard Bypass
    [-] Bypass Enable/Select

Better would be to check these together.
Even better would be to use Golden Model...
*/

// TODO: Do this.
val exRFConfigNoIntWrite = new RFConfig(0, 2, 2, 2, 4, 8, "None")
val exRFConfigNoExtWrite = new RFConfig(2, 0, 2, 2, 4, 8, "None")
val exRFConfigNoIntRead = new RFConfig(2, 2, 0, 2, 4, 8, "None")
val exRFConfigNoExtRead = new RFConfig(2, 2, 2, 0, 4, 8, "None")

val exRFConfigNoBypass = new RFConfig(2, 2, 2, 2, 4, 8, "None")
val exRFConfigSoftBypass = new RFConfig(2, 2, 2, 2, 4, 8, "Soft")
val exRFConfigHardBypass = new RFConfig(2, 2, 2, 2, 0, 8, "Firm")

val noIntWriteTest = Driver(() => new RF(exRFConfigNoIntWrite)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noExtWriteTest = Driver(() => new RF(exRFConfigNoExtWrite)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noIntReadTest = Driver(() => new RF(exRFConfigNoIntRead)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noExtReadTest = Driver(() => new RF(exRFConfigNoExtRead)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val noBypassTest = Driver(() => new RF(exRFConfigNoBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val softBypassTest = Driver(() => new RF(exRFConfigSoftBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

val hardBypassTest = Driver(() => new RF(exRFConfigHardBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}
                          

[[35minfo[0m] [0.001] Elaborating design...
[[35minfo[0m] [0.084] Done elaborating.
Total FIRRTL Compile Time: 853.5 ms
Total FIRRTL Compile Time: 185.7 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.007] SEED 1533059127272
test cmd2WrapperHelperRF Success: 0 tests passed in 5 cycles taking 0.069458 seconds
[[35minfo[0m] [0.009] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.025] Done elaborating.
Total FIRRTL Compile Time: 237.3 ms
Total FIRRTL Compile Time: 176.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533059128984
test cmd2WrapperHelperRF Success: 0 tests passed in 5 cycles taking 0.012032 seconds
[[35minfo[0m] [0.001] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.010] Done elaborating.
Total FIRRTL Compile Time: 113.4 ms
Total FIRRTL Compile Time: 86.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533059129

[36mexRFConfigNoIntWrite[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@6ef3ce0c
[36mexRFConfigNoExtWrite[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@2a23ddc7
[36mexRFConfigNoIntRead[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@785b78d4
[36mexRFConfigNoExtRead[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@59d97031
[36mexRFConfigNoBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@29aa56a8
[36mexRFConfigSoftBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@1d536bbf
[36mexRFConfigHardBypass[39m: [32mRFConfig[39m = $sess.cmd2Wrapper$Helper$RFConfig@3eed3d19
[36mnoIntWriteTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoExtWriteTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoIntReadTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoExtReadTest[39m: [32mBoolean[39m = [32mtrue[39m
[36mnoBypassTest[39m: [32mBoolean[39m = [32mtrue[39m
[36msoftBypassTest

## Inner Product Unit

##### Definition

In [47]:
class IPUConfig(val width: Int, val inBitWidth: Int, val outBitWidth: Int, val bpType: String) {
    
    require(width >= 1, "Width must be at least one.\n")
    require(List("None", "Firm").contains(bpType), "Bypass must be \"None\" or \"Firm\".\n")
    require(inBitWidth > 0 && outBitWidth > 0, "Data bitwidth must be greater than 0\n")
    
    val bpFirm = (bpType == "Firm")
}

class IPUOutput(outBitWidth: Int, bp: Boolean) extends Bundle {
    
    override def cloneType = (new IPUOutput(outBitWidth, bp)).asInstanceOf[this.type]
    
    val innerProd = SInt(outBitWidth.W)
    // Extending the bitwidths for consistency
    val bpWeight = if (bp) Some(SInt(outBitWidth.W)) else None
    val bpActvtn = if (bp) Some(SInt(outBitWidth.W)) else None
}


class IPU(c: IPUConfig) extends Module {

    val io = IO(new Bundle {
        val bpSel = if (c.bpFirm) Some(Input(Vec(c.width, Bool()))) else None
        val weightIn = Input(Vec(c.width, SInt(c.inBitWidth.W)))
        val actvtnIn = Input(Vec(c.width, SInt(c.inBitWidth.W)))
        val out = Output(new IPUOutput(c.outBitWidth, c.bpFirm))
    })
    
    private class PMult extends Module {
        val io = IO(new Bundle {
            val weightVec = Input(Vec(c.width, SInt(c.inBitWidth.W)))
            val actvtnVec = Input(Vec(c.width, SInt(c.inBitWidth.W)))
            val pairwiseProd = Output(Vec(c.width, SInt(c.outBitWidth.W)))
        })
        io.pairwiseProd := (io.weightVec zip io.actvtnVec).map { case(a, b) => a * b }
    }
    
    private class SumTree extends Module {
        val io = IO(new Bundle {
            val inVec = Input(Vec(c.width, SInt(c.outBitWidth.W)))
            val sum = Output(SInt(c.outBitWidth.W))
        })
        
        // Recursively creates a balanced syntax tree
        private def adjReduce[A](xs: List[A], op: (A, A) => A): A = xs match {
            case List(single) => single
            case default => {
                val grouped = default.grouped(2).toList
                val result = for (g <- grouped) yield { g match {
                    case List(a, b) => op(a, b)
                    case List(x) => x
                }}
                adjReduce(result, op)
            }
        }
        
        io.sum := adjReduce(io.inVec.toList, (x: SInt, y: SInt) => x + y)
    }
    
    private val pMult = Module(new PMult)
    pMult.io.weightVec := io.weightIn
    pMult.io.actvtnVec := io.actvtnIn
    
    private val sumTree = Module(new SumTree)
    sumTree.io.inVec := pMult.io.pairwiseProd
    
    io.out.innerProd := sumTree.io.sum
    
    if (c.bpFirm) {
        io.out.bpWeight.get := PriorityMux(io.bpSel.get, io.weightIn)
        io.out.bpActvtn.get := PriorityMux(io.bpSel.get, io.actvtnIn)
    }
}

defined [32mclass[39m [36mIPUConfig[39m
defined [32mclass[39m [36mIPUOutput[39m
defined [32mclass[39m [36mIPU[39m

#### Verification

In [51]:
/*
Basic Test Checklist:
[-] Inner Products

[-] None Bypass

[-] Firm Bypass
    [-] Bypass Select

[-] Differing bitwidths
    [-] Small -> Large
    [-] Large -> Small
    [-] Bypass bitwidth

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exIPUConfigNoBypass = new IPUConfig(4, 8, 8, "None")
val exIPUConfigFirmBypass = new IPUConfig(4, 8, 8, "Firm")
val exIPUConfigDiffBW1 = new IPUConfig(4, 4, 8, "None")
val exIPUConfigDiffBW2 = new IPUConfig(4, 8, 4, "None")

Driver(() => new IPU(exIPUConfigNoBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigFirmBypass)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigDiffBW1)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new IPU(exIPUConfigDiffBW2)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 13.0 ms
Total FIRRTL Compile Time: 10.8 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533064849530
test cmd46WrapperHelperIPU Success: 0 tests passed in 5 cycles taking 0.002218 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.003] Done elaborating.
Total FIRRTL Compile Time: 14.8 ms
Total FIRRTL Compile Time: 9.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533064849568
test cmd46WrapperHelperIPU Success: 0 tests passed in 5 cycles taking 0.001506 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 11.6 ms
Total FIRRTL Compile Time: 10.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 153306484960

[36mexIPUConfigNoBypass[39m: [32mIPUConfig[39m = $sess.cmd46Wrapper$Helper$IPUConfig@4aaf6e38
[36mexIPUConfigFirmBypass[39m: [32mIPUConfig[39m = $sess.cmd46Wrapper$Helper$IPUConfig@783c2da5
[36mexIPUConfigDiffBW1[39m: [32mIPUConfig[39m = $sess.cmd46Wrapper$Helper$IPUConfig@95505d6
[36mexIPUConfigDiffBW2[39m: [32mIPUConfig[39m = $sess.cmd46Wrapper$Helper$IPUConfig@5baae44b
[36mres50_4[39m: [32mBoolean[39m = [32mtrue[39m
[36mres50_5[39m: [32mBoolean[39m = [32mtrue[39m
[36mres50_6[39m: [32mBoolean[39m = [32mtrue[39m
[36mres50_7[39m: [32mBoolean[39m = [32mtrue[39m

## ALU

#### Definition

In [55]:
class ALUConfig(val dataWidth: Int, val funcs: List[String]) {
    
    require(funcs.length > 0, "Must support at least one function.")
    for(x <- funcs) { 
        require(List("Identity", "Add", "Max", "Accumulate").contains(x), "Unsupported function.")
    }
    
    val idnSupp = funcs.contains("Identity")
    val addSupp = funcs.contains("Add")
    val maxSupp = funcs.contains("Max")
    val accSupp = funcs.contains("Accumulate")
    val addBypassIn = addSupp || maxSupp
}

class ALUFSel(c: ALUConfig) extends Bundle {
    
    override def cloneType = (new ALUFSel(c)).asInstanceOf[this.type]
    
    // Priority is given from top to bottom
    val idnEnable = if (c.idnSupp) Some(Bool()) else None
    val addEnable = if (c.addSupp) Some(Bool()) else None
    val maxEnable = if (c.maxSupp) Some(Bool()) else None
    val accEnable = if (c.accSupp) Some(Bool()) else None
}

class ALU(c: ALUConfig) extends Module {
 
    val io = IO(new Bundle {
        val fSel = Input(new ALUFSel(c))
        val ipu = Input(new IPUOutput(c.dataWidth, c.addBypassIn))
        val rf = if (c.accSupp) Some(Input(SInt(c.dataWidth.W))) else None
        val out = Output(SInt(c.dataWidth.W))
    })
    
    // The inner "OrElse" clauses are logically unnecessary,
    // but Chisel can't infer that.
    when (io.fSel.idnEnable.getOrElse(false.B)) {
        io.out := io.ipu.innerProd
    } .elsewhen (io.fSel.addEnable.getOrElse(false.B)) {
        io.out := io.ipu.bpWeight.getOrElse(0.S) + io.ipu.bpActvtn.getOrElse(0.S)
    } .elsewhen (io.fSel.maxEnable.getOrElse(false.B)) {
        when (io.ipu.bpWeight.getOrElse(0.S) > io.ipu.bpActvtn.getOrElse(0.S)) {
            io.out := io.ipu.bpWeight.getOrElse(0.S)
        } .otherwise {
            io.out := io.ipu.bpActvtn.getOrElse(0.S)
        }
    } .elsewhen (io.fSel.accEnable.getOrElse(false.B)) {
        io.out := io.ipu.innerProd + io.rf.getOrElse(0.S)
    } .otherwise {
        io.out := 0.S
    }
}

defined [32mclass[39m [36mALUConfig[39m
defined [32mclass[39m [36mALUFSel[39m
defined [32mclass[39m [36mALU[39m

#### Verification

In [54]:
/*
Basic Test Checklist:
[-] Alone
    [-] Identity
    [-] Add
    [-] Max
    [-] Accumulate

[-] All functions together
    [-] Identity
    [-] Add
    [-] Max
    [-] Accumulate
    [-] Priority

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exALUConfigIdn = new ALUConfig(8, List("Identity"))
val exALUConfigAdd = new ALUConfig(8, List("Add"))
val exALUConfigMax = new ALUConfig(8, List("Max"))
val exALUConfigAcc = new ALUConfig(8, List("Accumulate"))
val exALUConfigAll = new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate"))


Driver(() => new ALU(exALUConfigIdn)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAdd)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigMax)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAcc)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

Driver(() => new ALU(exALUConfigAll)) {
    uut => new PeekPokeTester(uut) {
        // TODO
    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.009] Done elaborating.
Total FIRRTL Compile Time: 8.1 ms
Total FIRRTL Compile Time: 5.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533064879839
test cmd52WrapperHelperALU Success: 0 tests passed in 5 cycles taking 0.001036 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.002] Done elaborating.
Total FIRRTL Compile Time: 5.8 ms
Total FIRRTL Compile Time: 6.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533064879875
test cmd52WrapperHelperALU Success: 0 tests passed in 5 cycles taking 0.000630 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.001] Done elaborating.
Total FIRRTL Compile Time: 6.6 ms
Total FIRRTL Compile Time: 4.4 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533064879898
tes

[36mexALUConfigIdn[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@47edf8bc
[36mexALUConfigAdd[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@461dc581
[36mexALUConfigMax[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@2b9cee82
[36mexALUConfigAcc[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@5ce1d2f4
[36mexALUConfigAll[39m: [32mALUConfig[39m = $sess.cmd52Wrapper$Helper$ALUConfig@7461e9cc
[36mres53_5[39m: [32mBoolean[39m = [32mtrue[39m
[36mres53_6[39m: [32mBoolean[39m = [32mtrue[39m
[36mres53_7[39m: [32mBoolean[39m = [32mtrue[39m
[36mres53_8[39m: [32mBoolean[39m = [32mtrue[39m
[36mres53_9[39m: [32mBoolean[39m = [32mtrue[39m

## Nonlinear Unit

In [61]:
class NLUConfig(val inBitWidth: Int, val outBitWidth: Int, val funcs: List[String]) {
    
    for(x <- funcs) {
        require(List("Identity", "ReLu").contains(x), "Unsupported Function")
    }
    
    val idSupp = funcs.contains("Identity")
    val reluSupp = funcs.contains("ReLu")
    val tanhSupp = false //funcs.contains("tanh")
    val sinhSupp = false //funcs.contains("sinh")
}

class NLUFSel(c: NLUConfig) extends Bundle {
    
    override def cloneType = (new NLUFSel(c)).asInstanceOf[this.type]
    
    val idEnable = if (c.idSupp) Some(Bool()) else None
    val reluEnable = if (c.reluSupp) Some(Bool()) else None
    val tanhEnable = if (c.tanhSupp) Some(Bool()) else None 
    val sinhEnable = if (c.sinhSupp) Some(Bool()) else None
}

class NLU(c: NLUConfig) extends Module {
    
    val io = IO(new Bundle {
        val fSel = Input(new NLUFSel(c))
        val in = Input(SInt(c.inBitWidth.W))
        val out = Output(SInt(c.outBitWidth.W))
    })
    
    when (io.fSel.idEnable.getOrElse(false.B)) {
        io.out := io.in
    } .elsewhen (io.fSel.reluEnable.getOrElse(false.B)) {
        when (io.in.data > 0.S) {
            io.out := io.in.data
        } .otherwise {
            io.out := 0.S
        }
    } .elsewhen (io.fSel.tanhEnable.getOrElse(false.B)) {
        // TODO
        io.out := 0.S
    } .elsewhen (io.fSel.sinhEnable.getOrElse(false.B)) {
        // TODO
        io.out := 0.S
    } .otherwise {
        io.out := 0.S
    }
}

defined [32mclass[39m [36mNLUConfig[39m
defined [32mclass[39m [36mNLUFSel[39m
defined [32mclass[39m [36mNLU[39m

In [62]:
/*
Basic Test Checklist:
[ ] Alone
    [ ] Identity
    [ ] ReLu
    [ ] tanh
    [ ] sinh

[ ] All functions together
    [ ] Identity works
    [ ] ReLu works
    [ ] tanh works
    [ ] sinh works
    [ ] Correct Priority

Better would be to check these together.
Even better would be to use Golden Model...
*/

val exNLUConfigId = new NLUConfig(8, 8, List("Identity"))
val exNLUConfigReLu = new NLUConfig(8, 8, List("ReLu"))
val exNLUConfigIdReLu = new NLUConfig(8, 8, List("Identity", "ReLu"))

Driver(() => new NLU(exNLUConfigId)) {
    uut => new PeekPokeTester(uut) {

    }
}

Driver(() => new NLU(exNLUConfigReLu)) {
    uut => new PeekPokeTester(uut) {

    }
}

Driver(() => new NLU(exNLUConfigIdReLu)) {
    uut => new PeekPokeTester(uut) {

    }
}

[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.005] Done elaborating.
Total FIRRTL Compile Time: 5.0 ms
Total FIRRTL Compile Time: 3.6 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533067218060
test cmd60WrapperHelperNLU Success: 0 tests passed in 5 cycles taking 0.000659 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.001] Done elaborating.
Total FIRRTL Compile Time: 4.6 ms
Total FIRRTL Compile Time: 4.0 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533067218083
test cmd60WrapperHelperNLU Success: 0 tests passed in 5 cycles taking 0.000660 seconds
[[35minfo[0m] [0.000] RAN 0 CYCLES PASSED
[[35minfo[0m] [0.000] Elaborating design...
[[35minfo[0m] [0.001] Done elaborating.
Total FIRRTL Compile Time: 4.1 ms
Total FIRRTL Compile Time: 4.2 ms
End of dependency graph
Circuit state created
[[35minfo[0m] [0.000] SEED 1533067218101
tes

[36mexNLUConfigId[39m: [32mNLUConfig[39m = $sess.cmd60Wrapper$Helper$NLUConfig@6b6bf948
[36mexNLUConfigReLu[39m: [32mNLUConfig[39m = $sess.cmd60Wrapper$Helper$NLUConfig@27aeddf3
[36mexNLUConfigIdReLu[39m: [32mNLUConfig[39m = $sess.cmd60Wrapper$Helper$NLUConfig@4c9b7d05
[36mres61_3[39m: [32mBoolean[39m = [32mtrue[39m
[36mres61_4[39m: [32mBoolean[39m = [32mtrue[39m
[36mres61_5[39m: [32mBoolean[39m = [32mtrue[39m

## Control

### State Machine

#### Definition

In [63]:
class StateMachineConfig(
        val numStates: Int, 
        val numCtrlSigs: Int, 
        val stateMap: (UInt, UInt, StateMachineConfig) => UInt) {
    
    val stateWidth = log2Up(numStates)
    val ctrlWidth = log2Up(numCtrlSigs)
}

class StateMachine(c: StateMachineConfig) extends Module {
    
    val io = IO(new Bundle {
        val control = Input(UInt(c.ctrlWidth.W))
        val out = Output(UInt(c.stateWidth.W))
    })
    
    val register = RegInit(0.U(c.stateWidth.W))
    register := c.stateMap(register, io.control, c)
    
    io.out := register
}

defined [32mclass[39m [36mStateMachineConfig[39m
defined [32mclass[39m [36mStateMachine[39m

#### Example

In [None]:
def exampleStateMap(state: UInt, control: UInt, c: StateMachineConfig): UInt = {
    
    val nextState = Wire(UInt(c.stateWidth.W))
    
    when      (state === 0.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 0.U & control === 1.U) { nextState := 1.U }
    .elsewhen (state === 1.U & control === 0.U) { nextState := 0.U }
    .elsewhen (state === 1.U & control === 1.U) { nextState := 1.U }
    .otherwise { nextState := 0.U }
    
    nextState
}

#### Verification

In [None]:
val exampleStateMachineConfig = new StateMachineConfig(2, 2, exampleStateMap)

Driver(() => new StateMachine(exampleStateMachineConfig)) {
    uut => new PeekPokeTester(uut) {
        poke(uut.io.control, 0)
        expect(uut.io.out, 0)
        
        // 0 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 1
        poke(uut.io.control, 1)
        step(1)
        expect(uut.io.out, 1)
        
        // 1 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
        
        // 0 -> 0
        poke(uut.io.control, 0)
        step(1)
        expect(uut.io.out, 0)
    }
}

### Decoder

#### Definition

In [None]:
class PEConfig(
        val weightRFConfig: RFConfig,
        val actvtnRFConfig: RFConfig,
        val scratchRFConfig: RFConfig,
        val ipuConfig: IPUConfig,
        val aluConfig: ALUConfig,
        val nluConfig: NLUConfig,
        val smConfig: StateMachineConfig,
        val decodeWeightRF: (UInt, RFConfig) => Data,
        val decodeActvtnRF: (UInt, RFConfig) => Data,
        val decodeScratchRF: (UInt, RFConfig) => Data,
        val decodeIPU: (UInt, IPUConfig) => Data,
        val decodeALU: (UInt, ALUConfig) => Data,
        val decodeNLU: (UInt, NLUConfig) => Data)

class MemoryControl(c: PEConfig) extends Bundle {
    
    override def cloneType = (new MemoryControl(c)).asInstanceOf[this.type]
    
    val weightRF = new RFControl(c.weightRFConfig)
    val actvtnRF = new RFControl(c.actvtnRFConfig)
    val scratchRF = new RFControl(c.scratchRFConfig)
}

class ProcessControl(c: PEConfig) extends Bundle {
    
    override def cloneType = (new ProcessControl(c)).asInstanceOf[this.type]
    
    val aluFSel = Output(Vec(c.aluConfig.funcs.length, Bool()))
    val nluFSel = Output(Vec(c.nluConfig.funcs.length, Bool()))
    
    private val ipuPorts = c.weightRFConfig.numIntOutputs
    private val ipuBpFirm = c.ipuConfig.bpFirm
    val ipuBpSel = if (ipuBpFirm) Some(Output(Vec(ipuPorts, Bool()))) else None
}

class Decoder(c: PEConfig) extends Module {
    
    val io = IO(new Bundle {
        val state = Input(UInt(c.smConfig.stateWidth.W))
        val mem = Output(new MemoryControl(c))
        val proc = Output(new ProcessControl(c))
    })
    
    io.mem.weightRF <> c.decodeWeightRF(io.state, c.weightRFConfig)
    io.mem.actvtnRF <> c.decodeActvtnRF(io.state, c.actvtnRFConfig)
    io.mem.scratchRF <> c.decodeScratchRF(io.state, c.scratchRFConfig)
    
    if (c.ipuConfig.bpFirm) { 
        io.proc.ipuBpSel.get := c.decodeIPU(io.state, c.ipuConfig)
    }
    
    io.proc.aluFSel := c.decodeALU(io.state, c.aluConfig)
    io.proc.nluFSel := c.decodeNLU(io.state, c.nluConfig)
}

#### Example

In [None]:
def exampleDecodeWeightPRF(state: UInt, c: PRFConfig) = {
    
    val data = Wire(new PRFControl(c))
    
    when (state === 0.U) {
        data.rf.foreach { k =>
            k.wEnable   := true.B
            k.rEnable   := true.B
            k.wAddr     := 1.U
            k.rAddrInt  := 2.U
            k.rAddrExt  := 3.U
            if (k.bpSel.isDefined) { k.bpSel.get := true.B }
        }
    } .otherwise {
        data.rf.foreach { k =>
            k.wEnable   := false.B
            k.rEnable   := false.B
            k.wAddr     := 4.U
            k.rAddrInt  := 5.U
            k.rAddrExt  := 6.U
            if (k.bpSel.isDefined) { k.bpSel.get := false.B }
        }
    }
    
    data
}

def exampleDecodeActvtnPRF(state: UInt, c: PRFConfig) = {
    
    val data = Wire(new PRFControl(c))
    
    when (state === 0.U) {
        data.rf.foreach { k =>
            k.wEnable   := true.B
            k.rEnable   := true.B
            k.wAddr     := 1.U
            k.rAddrInt  := 2.U
            k.rAddrExt  := 3.U
            if (k.bpSel.isDefined) { k.bpSel.get := true.B }
        }
    } .otherwise {
        data.rf.foreach { k =>
            k.wEnable   := false.B
            k.rEnable   := false.B
            k.wAddr     := 4.U
            k.rAddrInt  := 5.U
            k.rAddrExt  := 6.U
            if (k.bpSel.isDefined) { k.bpSel.get := false.B }
        }
    }
    
    data
}

def exampleDecodeScratchRF(state: UInt, c: RFConfig) = {
    
    val data = Wire(new RFControl(c))
    
    when (state === 0.U) {
        data.wEnable   := true.B
        data.rEnable   := true.B
        data.wAddr     := 1.U
        data.rAddrInt  := 2.U
        data.rAddrExt  := 3.U
        if(data.bpSel.isDefined) { data.bpSel.get := true.B }
        if(data.inSel.isDefined) { data.inSel.get := Vec(List(true.B, false.B)) }
    } .otherwise {
        data.wEnable   := false.B
        data.rEnable   := false.B
        data.wAddr     := 4.U
        data.rAddrInt  := 5.U
        data.rAddrExt  := 6.U
        if(data.bpSel.isDefined) { data.bpSel.get := false.B }
        if(data.inSel.isDefined) { data.inSel.get := Vec(List(false.B, true.B)) }
    }
    
    data
}

def exampleDecodeIPU(state: UInt, c: IPUConfig) = {
    
    val data = Wire(Vec(c.width, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}

def exampleDecodeALU(state: UInt, c: ALUConfig) = {
    
    val data = Wire(Vec(c.numFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: 0.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: 0.U :: 0.U :: Nil)
    }
    
    data
}

def exampleDecodeNLU(state: UInt, c: NLUConfig) = {
    
    val data = Wire(Vec(c.numFuncs, Bool()))
    
    when (state === 0.U) {
        data := Vec(1.U :: 0.U :: Nil)
    } .otherwise {
        data := Vec(0.U :: 1.U :: Nil)
    }
    
    data
}


#### Verification

In [None]:
// TODO: require IPU width == weightPRF width == actvtnPRF width
// TODO: require IPUConfig "Firm" if ALUConfig "Add" or "Max"

val examplePEConfig = new PEConfig(
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new RFConfig(8, 4, 2, true),
    new IPUConfig(2, 8, "Firm"),
    new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate")),
    new NLUConfig(8, List("Identity", "ReLu")),
    new StateMachineConfig(4, 4, exampleStateMap),
    exampleDecodeWeightPRF,
    exampleDecodeActvtnPRF,
    exampleDecodeScratchRF,
    exampleDecodeIPU,
    exampleDecodeALU,
    exampleDecodeNLU
)


Driver(() => new Decoder(examplePEConfig)) {
    
    uut => new PeekPokeTester(uut) {
        
        poke(uut.io.state, 0.U)
        step(1)
        
        expect(uut.io.mem.weightPRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.weightPRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.weightPRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.weightPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.mem.actvtnPRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.actvtnPRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.actvtnPRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.actvtnPRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 1)
        expect(uut.io.proc.ipuBpSel.get(1), 0)
        
        expect(uut.io.proc.aluFSel(0), 1)
        expect(uut.io.proc.aluFSel(1), 0)
        expect(uut.io.proc.aluFSel(2), 0)
        expect(uut.io.proc.aluFSel(3), 0)
        
        expect(uut.io.mem.sratchRF.rf(0).wEnable, true.B)
        expect(uut.io.mem.sratchRF.rf(0).rEnable, true.B)
        expect(uut.io.mem.sratchRF.rf(0).wAddr, 1.U)
        expect(uut.io.mem.sratchRF.rf(0).rAddrInt, 2.U)
        expect(uut.io.mem.sratchRF.rf(0).rAddrExt, 3.U)
        expect(uut.io.mem.sratchRF.rf(0).bpSel.get, true.B)
        
        expect(uut.io.proc.nluFSel(0), 1)
        expect(uut.io.proc.nluFSel(1), 0)
        
        poke(uut.io.state, 1.U) 
        step(1)
        
        expect(uut.io.mem.weightPRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.weightPRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.weightPRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.weightPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.weightPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.mem.actvtnPRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.actvtnPRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.actvtnPRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.actvtnPRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.actvtnPRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.proc.ipuBpSel.get(0), 0)
        expect(uut.io.proc.ipuBpSel.get(1), 1)
        
        expect(uut.io.proc.aluFSel(0), 0)
        expect(uut.io.proc.aluFSel(1), 1)
        expect(uut.io.proc.aluFSel(2), 0)
        expect(uut.io.proc.aluFSel(3), 0)
        
        expect(uut.io.mem.scratchRF.rf(0).wEnable, false.B)
        expect(uut.io.mem.scratchRF.rf(0).rEnable, false.B)
        expect(uut.io.mem.scratchRF.rf(0).wAddr, 4.U)
        expect(uut.io.mem.scratchRF.rf(0).rAddrInt, 5.U)
        expect(uut.io.mem.scratchRF.rf(0).rAddrExt, 6.U)
        expect(uut.io.mem.scratchRF.rf(0).bpSel.get, false.B)
        
        expect(uut.io.proc.nluFSel(0), 0)
        expect(uut.io.proc.nluFSel(1), 1)
        
    }
}


## PE

#### Definition

In [None]:
class PE(c: PEConfig) extends Module {
    
    val cw = c.weightPRFConfig
    val ca = c.actvtnPRFConfig
    val cs = c.scratchRFConfig
    
    val io = IO(new Bundle {
        val stateCtrl = Input(UInt(c.smConfig.ctrlWidth.W))
        val toWeightPRF = Input(new PRFInput(cw))
        val toActvtnPRF = Input(new PRFInput(ca))
        val toScratchRF = Input(SInt(cs.dataWidth.W))
        val fromWeightPRF = Output(Vec(cw.ports, SInt(cw.dataWidth.W)))
        val fromActvtnPRF = Output(Vec(ca.ports, SInt(ca.dataWidth.W)))
        val fromScratchRF = Output(SInt(cs.dataWidth.W))
        val totalOutput = Output(SInt(c.nluConfig.dataWidth.W))
    })
    
    val stateMachine = Module(new StateMachine(c.smConfig))
    stateMachine.io.control := io.stateCtrl
    
    val decoder = Module(new Decoder(c))
    decoder.io.state := stateMachine.io.out
    
    val weightPRF = Module(new PRF(cw))
    weightPRF.io.control <> decoder.io.mem.weightPRF
    weightPRF.io.in <> io.toWeightPRF
    weightPRF.io.out.rf.zipWithIndex.map { 
        case (x: RFOutput, i: Int) => io.fromWeightPRF(i) := x.ext
    }
    
    val actvtnPRF = Module(new PRF(ca))
    actvtnPRF.io.control <> decoder.io.mem.actvtnPRF
    actvtnPRF.io.in <> io.toActvtnPRF
    actvtnPRF.io.out.rf.zipWithIndex.map {
        case (x: RFOutput, i: Int) => io.fromActvtnPRF(i) := x.ext
    }
       
    val ipu = Module(new IPU(c.ipuConfig))
    if (ipu.io.bpSel.isDefined) { ipu.io.bpSel.get := decoder.io.proc.ipuBpSel.get }
    weightPRF.io.out.rf.zipWithIndex.map { 
        case (x: RFOutput, i: Int) => ipu.io.dataIn.weight(i) := x.int
    }
    actvtnPRF.io.out.rf.zipWithIndex.map {
        case (x: RFOutput, i: Int) => ipu.io.dataIn.actvtn(i) := x.int
    }

    val alu = Module(new ALU(c.aluConfig))
    alu.io.in.funcSel := decoder.io.proc.aluFSel
    alu.io.in.ipu <> ipu.io.dataOut
    
    val scratchRF = Module(new RF(cs))
    scratchRF.io.control <> decoder.io.mem.scratchRF
    // This next group of statements is the result of poor decisions :(
    scratchRF.io.in.data(0) := io.toScratchRF
    scratchRF.io.in.data(1) := alu.io.out
    io.fromScratchRF := scratchRF.io.out.ext
    if(alu.io.in.rfFeedback.isDefined) alu.io.in.rfFeedback.get := scratchRF.io.out.int
    
    val nlu = Module(new NLU(c.nluConfig))
    nlu.io.in.fSel := decoder.io.proc.nluFSel
    nlu.io.in.data := scratchRF.io.out.int
    io.totalOutput := nlu.io.out
}

#### Verification

In [None]:
val examplePEConfig10 = new PEConfig(
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new PRFConfig(2, 8, 4, 1, "Soft"),
    new RFConfig(8, 4, 2, false),
    new IPUConfig(2, 8, "Firm"),
    new ALUConfig(8, List("Identity", "Add", "Max", "Accumulate")),
    new NLUConfig(8, List("Identity", "ReLu")),
    new StateMachineConfig(4, 4, exampleStateMap),
    exampleDecodeWeightPRF,
    exampleDecodeActvtnPRF,
    exampleDecodeScratchRF,
    exampleDecodeIPU,
    exampleDecodeALU,
    exampleDecodeNLU
)

Driver(() => new PE(examplePEConfig10)) {
    uut => new PeekPokeTester(uut) {
        
    }
}

## Future Plans
* Verify everything using Golden Models