# An Example Design Flow

## Setup

In [None]:
val ivy_path = System.getProperty("user.dir") + "/load-ivy.sc"
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(ivy_path)))

In [None]:
import java.util.Date
import chisel3.iotesters
import chisel3.iotesters.PeekPokeTester
import pillars.archlib.TileLSUBlock
import pillars.core._
import pillars.hardware.{SynthesizedModule, TopModule}
import pillars.mapping.{DFG, DotReader, ILPMap, OmtMap, SearchMap}
import pillars.testers.{AppTestHelper, ApplicationTester}

import chiseltest._
import chiseltest.iotesters.PeekPokeTester
import chiseltest.simulator.VerilatorBackendAnnotation
import org.scalatest.flatspec.AnyFlatSpec
import sys.process._

## Define an end-to-end flow

In [None]:
object Tutorial {
  def main(args: Array[String]): Unit = {
    /** Prepare runtime information manually.
     *
     * @param dfg     the data-flow graph
     * @param numSRAM the number of SRAM in a CGRA tile
     * @return the runtime information
     */
    def prepareRuntimeInfo(dfg: DFG, numSRAM: Int) = {
      val dataSize = 50
      val VectorA = (0 until dataSize).map(_ => scala.math.abs(scala.util.Random.nextInt() % 1000)).toArray
      val VectorB = (0 until dataSize).map(_ => scala.math.abs(scala.util.Random.nextInt() % 1000)).toArray

      //Input random indexes into the mapped input port in CGRA,
      // and get A(index) + B(index) from the mapped output port.
      val inputIndexes = scala.util.Random.shuffle((0 until dataSize).toList)
      val expectedRet = (0 until dataSize).map(i => VectorA(inputIndexes(i)) + VectorB(inputIndexes(i)))

      //The base address of A and B in SRAM of an LSU.
      //To simplify the problem, we assume both A and B are stored
      //in all SRAMs belonging to 4 LSUs in the targeted architecture.
      val a_base = 0
      val b_base = dataSize

      //The value of const operators.
      val const0 = a_base
      val const1 = b_base
      val const2 = dataSize - 1
      val const3 = a_base
      val const4 = a_base
      val constVals = Array(const0, const1, const2, const3, const4)

      val constOpNames = dfg.opNodes.filter(op => op.opcode == OpEnum.CONST).map(op => op.name)
      val constValue = (0 until constOpNames.size).map(i => ConstValue(constOpNames(i), constVals(i))).toList

      //Operator incr0 should generate (j <- 0 until dataSize).
      //So the parameter of the counter is (init = 0, step = 1, end = dataSize, freq = 1)
      val counterOpNames = dfg.opNodes.filter(op => op.opcode == OpEnum.INCR).map(op => op.name)
      val counterConfig = List(CounterConfig(counterOpNames(0), 0, 1, dataSize, 1))

      //In this simple tutorial, A and B are put into all LSUs.
      //But you can put them into partial LSUs according to the mapping results,
      // just like what in the ApplicationExamples.
      //Because the PEs in a row share an LSU, the number of LSUs is rowNum.
      val inputToSRAM = (0 until numSRAM).map(i => InputToSRAM(i, a_base, VectorA.toList)).toList :::
        (0 until numSRAM).map(i => InputToSRAM(i, b_base, VectorB.toList)).toList

      val outputFromSRAM = List(OutputFromSRAM(3, a_base, VectorA.reverse.toList))

      //Please make sure there are 2 operators with INPUT opcode in the DFG.
      val inputOpNames = dfg.opNodes.filter(op => op.opcode == OpEnum.INPUT).map(op => op.name)
      val inputToPort = List(InputToPort(inputOpNames(0), inputIndexes))

      val outputOpNames = dfg.opNodes.filter(op => op.opcode == OpEnum.OUTPUT).map(op => op.name)
      val outputFromPort = List(OutputFromPort(outputOpNames(0), expectedRet.toList))

      val runtimeInfo = RuntimeInfo(inputToPort, outputFromPort, inputToSRAM
        , outputFromSRAM, constValue, counterConfig)

      runtimeInfo
    }

    val rowNum = 4
    val colNum = 4
    val inputPort = 4
    val outputPort = 4
    val dataWidth = 32

    //Initialize the top block.
    val arch = new ArchitectureHierarchy()
    arch.addInPorts((0 until inputPort).map(i => s"input_$i").toArray)
    arch.addOutPorts((0 until outputPort).map(i => s"out_$i").toArray)

    val tile = new TileLSUBlock("tile_0", colNum, rowNum, inputPort, outputPort,
      useMuxBypass = false, complex = true, isToroid = false, useCounter = true, dataWidth = dataWidth)
    arch.addBlock(tile)

    (0 until inputPort).foreach(i =>
      arch.addConnect(arch.term(s"input_$i") -> tile / s"input_$i"))
    (0 until outputPort).foreach(i =>
      arch.addConnect(tile / s"out_$i" -> arch.term(s"out_$i")))
    arch.init()

    //Get MRRG and mapping.
    //You can also use dumpMRRG(targetedII, filename) to save the MRRG,
    // and use loadTXT(mrrgFilename) to load the MRRG.
    val II = 1
    val MRRG = arch.getMRRG(II)
    val dfgFilename = "Vadd_Reverse.dot"
    val dfg = DotReader.loadDot(dfgFilename, II)
    val mappingResultFilename = s"ii$II"

    object Solver extends Enumeration {
      val Gurobi, Search, Z3Prover = Value
    }
    val solver = Solver.Search
    val separatedPR = true
    val scheduleControl = true

    var startTime = new Date().getTime()
    solver match {
      case Solver.Gurobi => ILPMap.mapping(dfg, MRRG, filename = mappingResultFilename, separatedPR = separatedPR, scheduleControl = scheduleControl, skewLimit = 4, latencyLimit = 15)
      case Solver.Search => SearchMap.mapping(dfg, MRRG, mappingResultFilename, scheduleControl = scheduleControl, skewLimit = 4)
      case Solver.Z3Prover => OmtMap.mapping(dfg, MRRG, filename = mappingResultFilename, separatedPR = separatedPR, scheduleControl = scheduleControl, skewLimit = 4, latencyLimit = 15)
    }
    var endTime = new Date().getTime()
    println("Mapping runtime: " + (endTime - startTime))

    // PillarsConfig.USE_TOKEN = true

    //Generate the top design.
    val connect = new Connect(arch.connectArray)
    val hardwareGenerator = new HardwareGenerator(arch, connect)
    val topDesign = () => new TopModule(hardwareGenerator.pillarsModuleInfo,
      hardwareGenerator.connectMap, hardwareGenerator.regionList, dataWidth)

    //Generate the RTL codes.
    //chisel3.emitVerilog(topDesign(), Array("-td", "RTL/"))

    //Simulate with the mapping result.
    JsonParser.writeJson(prepareRuntimeInfo(dfg, rowNum), "runtime.json")
    val runtimeInfo = JsonParser.readJson("runtime.json")

    //Simulation settings.
    val simulationHelper = new SimulationHelper(arch)
    val resultFilename = s"ii$II" + "_r.txt"
    simulationHelper.init(resultFilename, runtimeInfo, II)

    val appTestHelper = new AppTestHelper(II)
    val moduleInfoFilename = s"ii$II" + "_i.txt"
    appTestHelper.init(arch, simulationHelper, moduleInfoFilename, runtimeInfo)

    //JsonParser.dumpRuntimeInfo(simulationHelper, appTestHelper, dfg)

    org.scalatest.run(new AnyFlatSpec with ChiselScalatestTester {
      it should "work" in {
        test(topDesign())
        .withAnnotations(Seq(VerilatorBackendAnnotation))
        .runPeekPoke(new VaddReverseTester(_, appTestHelper))
      }
    })
  }
}

/** A tester for vec-add + vec-reverse application.
 *
 * @param c             the top design
 * @param appTestHelper the class which is helpful when creating testers
 */
class VaddReverseTester(c: TopModule, appTestHelper: AppTestHelper)
  extends ApplicationTester(c, appTestHelper) {

  poke(c.io.en, 0)
  inputData()
  val testII = appTestHelper.getTestII()
  inputConfig(testII)
  poke(c.io.en, 1)
  checkPortOutsWithInput(testII)

  //Wait reverse finished
  step(10)
  checkLSUData()
}

## Execute the flow

In [None]:
Tutorial.main(Array())