## Agile Hardware Design
***
# Arbitration

<img src="./images/logo.svg" alt="agile hardware design logo" style="float:right"/>

## By Peter Hanping Chen based on 
## Prof. Scott Beamer
## sbeamer@ucsc.edu
## [CSE 228A](https://classes.soe.ucsc.edu/cse228a/Winter24/)
###
### Note:
### This chisel code is modified based on Scala configuration file: load-ivy.sc
### https://github.com/freechipsproject/chisel-bootcamp

## Plan for Today

* One-hot encoding
* Priority encoders
* Arbiters
* Crossbar Example

## Loading The Chisel Library Into a Notebook

In [1]:
//interp.load.module(os.Path(s"${System.getProperty("user.dir")}/../resource/chisel_deps.sc"))
val path = System.getProperty("user.dir") + "/source/load-ivy.sc"
//val path = System.getProperty("user.dir") + "/source/chisel_deps.sc"
println("path: "+path)

path: /home/peter/AIU/AIU_CS800_Chisel/500_UCSC_HWD/008_Arbit/001_Code/source/load-ivy.sc


[36mpath[39m: [32mString[39m = [32m"/home/peter/AIU/AIU_CS800_Chisel/500_UCSC_HWD/008_Arbit/001_Code/source/load-ivy.sc"[39m

In [2]:
interp.load.module(ammonite.ops.Path(java.nio.file.FileSystems.getDefault().getPath(path)))

Checking https://repo1.maven.org/maven2/edu/berkeley/cs/chisel3_2.12/maven-metadata.xml
Checked https://repo1.maven.org/maven2/edu/berkeley/cs/chisel3_2.12/maven-metadata.xml
Checking https://repo1.maven.org/maven2/edu/berkeley/cs/chisel-iotesters_2.12/maven-metadata.xml
Checked https://repo1.maven.org/maven2/edu/berkeley/cs/chisel-iotesters_2.12/maven-metadata.xml
Checking https://repo1.maven.org/maven2/edu/berkeley/cs/chiseltest_2.12/maven-metadata.xml
Checked https://repo1.maven.org/maven2/edu/berkeley/cs/chiseltest_2.12/maven-metadata.xml
Checking https://repo1.maven.org/maven2/edu/berkeley/cs/dsptools_2.12/maven-metadata.xml
Checked https://repo1.maven.org/maven2/edu/berkeley/cs/dsptools_2.12/maven-metadata.xml
Checking https://repo1.maven.org/maven2/edu/berkeley/cs/firrtl-diagrammer_2.12/maven-metadata.xml
Checked https://repo1.maven.org/maven2/edu/berkeley/cs/firrtl-diagrammer_2.12/maven-metadata.xml


In [3]:
import chisel3._
import chisel3.util._
import chiseltest._
import chiseltest.RawTester.test

[32mimport [39m[36mchisel3._
[39m
[32mimport [39m[36mchisel3.util._
[39m
[32mimport [39m[36mchiseltest._
[39m
[32mimport [39m[36mchiseltest.RawTester.test[39m

## One-Hot Encoding

* Collection of wires where _**exactly**_ one wire is high (rest are low)

* Helpful for working with a collection of objects in which you only want one to be active/selected/enabled

* Examples
  * Setting the write enable high for the target register in register file
  * Charging the appropriate word line in a SRAM (often called a _decoder_)

* Can often avoid need to encode/decode because both producers and consumers of one-hot (OH) encoding may prefer it

<img src="images/decoder.svg" alt="xbar schematic" style="width:30%;margin-left:auto;margin-right:auto"/>

## Implementing Our Own One-Hot Encoder (1/2)

In [4]:
class ConvUIntToOH(inWidth: Int) extends Module {
    val outWidth = 1 << inWidth
    val io = IO(new Bundle {
        val in  = Input(UInt(inWidth.W))
        val out = Output(UInt(outWidth.W))
    })
    require (inWidth > 0)
    def helper(index: Int): UInt = {
        if (index < outWidth-1) Cat(helper(index+1), io.in === index.U)
        else io.in === index.U
    }
    io.out := helper(0)
//     io.out := UIntToOH(io.in)  // Standard library implementation
    printf("%d -> %b\n", io.in, io.out)
}

defined [32mclass[39m [36mConvUIntToOH[39m

## Implementing Our Own One-Hot Encoder (2/2)

In [5]:
//printVerilog(new ConvUIntToOH(2))
println (getVerilog(new ConvUIntToOH(2)))

Elaborating design...
Done elaborating.
module ConvUIntToOH(
  input        clock,
  input        reset,
  input  [1:0] io_in,
  output [3:0] io_out
);
  wire  _T = io_in == 2'h3; // @[cmd3.sc 10:20]
  wire  _T_1 = io_in == 2'h2; // @[cmd3.sc 9:60]
  wire  _T_3 = io_in == 2'h1; // @[cmd3.sc 9:60]
  wire [2:0] _T_4 = {_T,_T_1,_T_3}; // @[Cat.scala 30:58]
  wire  _T_5 = io_in == 2'h0; // @[cmd3.sc 9:60]
  assign io_out = {_T_4,_T_5}; // @[Cat.scala 30:58]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%d -> %b\n",io_in,io_out); // @[cmd3.sc 14:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



In [6]:
// 3 bits of Verilog
println (getVerilog(new ConvUIntToOH(3)))

Elaborating design...
Done elaborating.
module ConvUIntToOH(
  input        clock,
  input        reset,
  input  [2:0] io_in,
  output [7:0] io_out
);
  wire  _T = io_in == 3'h7; // @[cmd3.sc 10:20]
  wire  _T_1 = io_in == 3'h6; // @[cmd3.sc 9:60]
  wire  _T_3 = io_in == 3'h5; // @[cmd3.sc 9:60]
  wire  _T_5 = io_in == 3'h4; // @[cmd3.sc 9:60]
  wire  _T_7 = io_in == 3'h3; // @[cmd3.sc 9:60]
  wire  _T_9 = io_in == 3'h2; // @[cmd3.sc 9:60]
  wire  _T_11 = io_in == 3'h1; // @[cmd3.sc 9:60]
  wire [6:0] _T_12 = {_T,_T_1,_T_3,_T_5,_T_7,_T_9,_T_11}; // @[Cat.scala 30:58]
  wire  _T_13 = io_in == 3'h0; // @[cmd3.sc 9:60]
  assign io_out = {_T_12,_T_13}; // @[Cat.scala 30:58]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%d -> %b\n",io_in,io_out); // @[cmd3.sc 14:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



### Perform the One-Hot-Encoding Simulation ###

In [7]:
// 2 bits One-Hot-Encoding
test(new ConvUIntToOH(2)) { c =>
    for (i <- 0 until 4) {
         c.io.in.poke(i.U)
         c.io.out.expect((1 << i).U)
         c.clock.step()
     }
}

Elaborating design...
Done elaborating.
 0 ->    1
 1 ->   10
 2 ->  100
 3 -> 1000
 0 ->    1
test ConvUIntToOH Success: 0 tests passed in 6 cycles in 0.037580 seconds 159.66 Hz


In [8]:
//3 bits of One-Hot-Encdoing
test(new ConvUIntToOH(3)) { c =>
    for (i <- 0 until 4) {
         c.io.in.poke(i.U)
         c.io.out.expect((1 << i).U)
         c.clock.step()
     }
}

Elaborating design...
Done elaborating.
 0 ->        1
 1 ->       10
 2 ->      100
 3 ->     1000
 0 ->        1
test ConvUIntToOH Success: 0 tests passed in 6 cycles in 0.007828 seconds 766.48 Hz


In [8]:
//printVerilog(new ConvUIntToOH(2))

// test(new ConvUIntToOH(2)) { c =>
//    for (i <- 0 until 4) {
//         c.io.in.poke(i.U)
//         c.io.out.expect((1 << i).U)
//         c.clock.step()
//     }
// }

### Simplify One-Hot Encoder ###
We can remove helper function and just shift one bit to the output.

In [9]:
class ConvUIntToOH_2(inWidth: Int) extends Module {
    val outWidth = 1 << inWidth
    val io = IO(new Bundle {
        val in  = Input(UInt(inWidth.W))
        val out = Output(UInt(outWidth.W))
    })
    require (inWidth > 0)
    //def helper(index: Int): UInt = {
    //    if (index < outWidth-1) Cat(helper(index+1), io.in === index.U)
    //    else io.in === index.U
    //}
    //io.out := helper(0)
    io.out := UIntToOH(io.in)  // Standard library implementation
    printf("%d -> %b\n", io.in, io.out)
}

defined [32mclass[39m [36mConvUIntToOH_2[39m

In [10]:
println (getVerilog(new ConvUIntToOH_2(3)))

Elaborating design...
Done elaborating.
module ConvUIntToOH_2(
  input        clock,
  input        reset,
  input  [2:0] io_in,
  output [7:0] io_out
);
  assign io_out = 8'h1 << io_in; // @[OneHot.scala 58:35]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%d -> %b\n",io_in,io_out); // @[cmd8.sc 14:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



In [11]:
test(new ConvUIntToOH_2(3)) { c =>
    for (i <- 0 until 4) {
         c.io.in.poke(i.U)
         c.io.out.expect((1 << i).U)
         c.clock.step()
     }
}

Elaborating design...
Done elaborating.
 0 ->        1
 1 ->       10
 2 ->      100
 3 ->     1000
 0 ->        1
test ConvUIntToOH_2 Success: 0 tests passed in 6 cycles in 0.007048 seconds 851.35 Hz


## Priority Encoder

* Given collection of wires, returns index of least significant bit that is high (1) given predefined precedence ordering (_priority_)

* Helpful for ordering logic or choosing between things

* Examples
  * Resolving RAW hazard in a pipelined processor, forward data from most recent instruction
  * In a collection components, find first free slot

* Chisel provides result as an index with [`PriorityEncoder`](https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/PriorityEncoder\$.html),
one-hot with [`PriorityEncoderOH`](https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/PriorityEncoderOH\$.html), or even integrated into a Mux with [`PriorityMux`](https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/PriorityMux\$.html)
  * _What if input is 0?_ invalid, but returns max index or 0 (for OH)


## One-Hot Priority Encoders ##

<img src="images/priority.svg" alt="priority schematic" style="width:70%; align: left"/>

## Implement One-Hot Priority Encoder ##

### 1.1 Select with Gate: Chisel Implementation ### 

### 1.2 Select with Gate: Generate Hardware Verilog ###

In [45]:
// Select with Gate: Generate Hardware Verilog
println (getVerilog(new MyPriEncodeOH(2)))

Elaborating design...
Done elaborating.
module MyPriEncodeOH(
  input        clock,
  input        reset,
  input  [1:0] io_in,
  output [1:0] io_out
);
  wire  _T_1 = ~io_in[0]; // @[cmd43.sc 8:51]
  wire  _T_4 = io_in[1] & _T_1; // @[cmd43.sc 9:27]
  assign io_out = {_T_4,io_in[0]}; // @[Cat.scala 30:58]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%b -> %b\n",io_in,io_out); // @[cmd43.sc 18:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



### 1.3 Select with Gate: Test Sismulaiton ###

In [46]:
// Select with Gate: Test Simulation
test(new MyPriEncodeOH(3)) { c =>
   for (i <- 0 until 8) {
       c.io.in.poke(i.U)
       c.clock.step()
   }
}

Elaborating design...
Done elaborating.
  0 ->   0
  1 ->   1
 10 ->  10
 11 ->   1
100 -> 100
101 ->   1
110 ->  10
111 ->   1
  0 ->   0
test MyPriEncodeOH Success: 0 tests passed in 10 cycles in 0.006208 seconds 1610.92 Hz


### 2.1 Select with Mux: Chisel Implementation ###

In [49]:
// Select with Muxes
class MyPriEncodeOH(n: Int) extends Module {
    val io = IO(new Bundle {
        val in  = Input(UInt(n.W))
        val out = Output(UInt())
    })
    require (n > 0)
    def withGates(index: Int, expr: UInt): UInt = {
        if (index < (n-1)) Cat(withGates(index+1, ~io.in(index) & expr), io.in(index) & expr)
        else io.in(index) & expr
    }
    def withMuxes(index: Int): UInt = {
        if (index < n) Mux(io.in(index), (1 << index).U, withMuxes(index+1))
        else 0.U
    }
//  io.out := withGates(0, 1.U)
    io.out := withMuxes(0)
//     io.out := PriorityEncoderOH(io.in)  // Use Chisel Library
    printf("%b -> %b\n", io.in, io.out)
}

defined [32mclass[39m [36mMyPriEncodeOH[39m

### 2.2 Select with Mux: Generate Hardware Verilog ###

In [43]:
// Select with Muxes
println (getVerilog(new MyPriEncodeOH(2)))

Elaborating design...
Done elaborating.
module MyPriEncodeOH(
  input        clock,
  input        reset,
  input  [1:0] io_in,
  output [1:0] io_out
);
  wire [1:0] _T_2 = io_in[1] ? 2'h2 : 2'h0; // @[cmd39.sc 12:27]
  assign io_out = io_in[0] ? 2'h1 : _T_2; // @[cmd39.sc 12:27]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%b -> %b\n",io_in,io_out); // @[cmd39.sc 17:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



### 2.3 Select with Mux: Test Sismulaiton ###

In [42]:
test(new MyPriEncodeOH(3)) { c =>
   for (i <- 0 until 8) {
       c.io.in.poke(i.U)
       c.clock.step()
   }
}

Elaborating design...
Done elaborating.
  0 ->   0
  1 ->   1
 10 ->  10
 11 ->   1
100 -> 100
101 ->   1
110 ->  10
111 ->   1
  0 ->   0
test MyPriEncodeOH Success: 0 tests passed in 10 cycles in 0.006256 seconds 1598.44 Hz


### 3.1 Select with Chisel Library: Chisel Implementation ###

In [53]:
// Select with Chisel Library
class MyPriEncodeOH(n: Int) extends Module {
    val io = IO(new Bundle {
        val in  = Input(UInt(n.W))
        val out = Output(UInt())
    })
    require (n > 0)
    def withGates(index: Int, expr: UInt): UInt = {
        if (index < (n-1)) Cat(withGates(index+1, ~io.in(index) & expr), io.in(index) & expr)
        else io.in(index) & expr
    }
    def withMuxes(index: Int): UInt = {
        if (index < n) Mux(io.in(index), (1 << index).U, withMuxes(index+1))
        else 0.U
    }
//  io.out := withGates(0, 1.U)
    //io.out := withMuxes(0)
    io.out := PriorityEncoderOH(io.in)  // Use Chisel Library
    printf("%b -> %b\n", io.in, io.out)
}

defined [32mclass[39m [36mMyPriEncodeOH[39m

### 3.2 Select with Chisel Library: Generate Hardware Verilog ###

In [54]:
// Select with Chisel Library
println (getVerilog(new MyPriEncodeOH(2)))

Elaborating design...
Done elaborating.
module MyPriEncodeOH(
  input        clock,
  input        reset,
  input  [1:0] io_in,
  output [1:0] io_out
);
  wire [1:0] _T_2 = io_in[1] ? 2'h2 : 2'h0; // @[Mux.scala 47:69]
  assign io_out = io_in[0] ? 2'h1 : _T_2; // @[Mux.scala 47:69]
  always @(posedge clock) begin
    `ifndef SYNTHESIS
    `ifdef PRINTF_COND
      if (`PRINTF_COND) begin
    `endif
        if (~reset) begin
          $fwrite(32'h80000002,"%b -> %b\n",io_in,io_out); // @[cmd52.sc 18:11]
        end
    `ifdef PRINTF_COND
      end
    `endif
    `endif // SYNTHESIS
  end
endmodule



### 3.3 Select with Chisel Library: Test Simulaiton ###

In [55]:
// Select with Chisel Library
test(new MyPriEncodeOH(3)) { c =>
   for (i <- 0 until 8) {
       c.io.in.poke(i.U)
       c.clock.step()
   }
}

Elaborating design...
Done elaborating.
  0 ->   0
  1 ->   1
 10 ->  10
 11 ->   1
100 -> 100
101 ->   1
110 ->  10
111 ->   1
  0 ->   0
test MyPriEncodeOH Success: 0 tests passed in 10 cycles in 0.006478 seconds 1543.60 Hz


In [12]:
//printVerilog(new MyPriEncodeOH(2))

// test(new MyPriEncodeOH(3)) { c =>
//     for (i <- 0 until 8) {
//         c.io.in.poke(i.U)
//         c.clock.step()
//     }
// }

## Arbiter

* _Arbitration_ is needed to choose between multiple components attempting to access a scarce resource

* Needs way to choose (_arbitrate_) if multiple simultaneous requests
  * If only one request, grant to lone requestor

* Different tie-breaking algorithms available e.g. fixed priority or round-robin
  * Consider needs for usage scenario

* Examples
  * Structural hazard in a processor, such as core & memory both trying to write to cache at same time
  * Output ports of a network switch (later today)

## Arbiters in Chisel

* Use `Decoupled` for both requestors and outcome
  * `valid` (from requestor) indicates if actually sending request
  * `ready` (to requestor) indicates request granted

1. Arbiter:fixed priority from least significant, e.g. port 0 wins

- https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/Arbiter.html

2. RRArbiter: round robin for who wins ties

- https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/RRArbiter.html

3. LockingRRArbiter: round robin, but "winner" granted out for `count` cycles

- https://javadoc.io/doc/edu.berkeley.cs/chisel3_2.13/latest/chisel3/util/LockingRRArbiter.html

<img src="images/arbiter.svg" alt="arbiter schematic" style="width:45%; align:left"/>

## 8.11 Chisel Arbiter Utility ##

### 8.11.1 Chisel Utility LockingRRArbiter(): Chisel Implementation ###

In [92]:
// Use Utility: LockingRRArbiter()
// Use <> (Not euqal to) arb.io.out
class UtilArbDemo(numPorts: Int, w: Int) extends Module {
    val io = IO(new Bundle {
        val req = Flipped(Vec(numPorts, Decoupled(UInt(w.W))))
        //val req = Vec(numPorts, Decoupled(UInt(w.W)))
        val out = Decoupled(UInt(w.W))
    })
    require (numPorts > 0)
    val arb = Module(new LockingRRArbiter(UInt(w.W), numPorts, 2))
    // We can comment out the 
    for (p <- 0 until numPorts) {
        arb.io.in(p) <> io.req(p) 
    }
    // Above loop does the same thing as below.
    //arb.io.in <> io.req
    io.out <> arb.io.out
    printf("requestor: ")
    for (p <- numPorts-1 to 0 by -1) {
        printf("%b", arb.io.in(p).valid)
    }
    printf(" winner: out.bits = %d (out.valid = %b)\n", arb.io.out.bits, arb.io.out.valid)
}

defined [32mclass[39m [36mUtilArbDemo[39m

### 8.11.2 Chisel Utility LockingRRArbiter(): Generate Hardware Verilog ###

In [93]:
// UtilArbDemo (num-Port, width)
println (getVerilog(new UtilArbDemo(2,8)))

Elaborating design...
Done elaborating.
module LockingRRArbiter(
  input        clock,
  input        reset,
  output       io_in_0_ready,
  input        io_in_0_valid,
  input  [7:0] io_in_0_bits,
  output       io_in_1_ready,
  input        io_in_1_valid,
  input  [7:0] io_in_1_bits,
  input        io_out_ready,
  output       io_out_valid,
  output [7:0] io_out_bits,
  output       io_chosen
);
`ifdef RANDOMIZE_REG_INIT
  reg [31:0] _RAND_0;
  reg [31:0] _RAND_1;
  reg [31:0] _RAND_2;
`endif // RANDOMIZE_REG_INIT
  reg  value; // @[Counter.scala 60:40]
  reg  lockIdx; // @[Arbiter.scala 46:22]
  wire  _T = io_out_ready & io_out_valid; // @[Decoupled.scala 40:37]
  reg  lastGrant; // @[Reg.scala 15:16]
  wire  grantMask_1 = 1'h1 > lastGrant; // @[Arbiter.scala 67:49]
  wire  validMask_1 = io_in_1_valid & grantMask_1; // @[Arbiter.scala 68:75]
  wire  _GEN_8 = io_in_0_valid ? 1'h0 : 1'h1; // @[Arbiter.scala 77:27 Arbiter.scala 77:36]
  wire  choice = validMask_1 | _GEN_8; // @[Arbiter

### 8.11.3 Chisel Utility LockingRRArbiter(): Run Simulation ###

In [95]:
// Test Simulation: UtilArbDemo (num_port=4, width=8)
// printVerilog(new UtilArbDemo(2,8))
// We only check the resule for valid = 1.
val numPorts = 4
test(new UtilArbDemo(numPorts,8)) { c =>
    c.io.out.ready.poke(true.B)
    for (cycle <- 0 until 5) {
        for (p <- 0 until numPorts) {
            c.io.req(p).bits.poke(p.U)
            c.io.req(p).valid.poke((p >= cycle).B)
        }
        c.clock.step()
    }
}

Elaborating design...
Done elaborating.
requestor: 1111 winner: out.bits =    1 (out.valid = 1)
requestor: 1110 winner: out.bits =    1 (out.valid = 1)
requestor: 1100 winner: out.bits =    2 (out.valid = 1)
requestor: 1000 winner: out.bits =    2 (out.valid = 0)
requestor: 0000 winner: out.bits =    2 (out.valid = 0)
requestor: 0000 winner: out.bits =    0 (out.valid = 0)
test UtilArbDemo Success: 0 tests passed in 7 cycles in 0.007011 seconds 998.48 Hz


[36mnumPorts[39m: [32mInt[39m = [32m4[39m

## Original ArbDemo: Arbiter Chisel Code (1/3)

In [103]:
class ArbDemo(numPorts: Int, w: Int) extends Module {
    val io = IO(new Bundle {
        val req = Flipped(Vec(numPorts, Decoupled(UInt(w.W))))
        val out = Decoupled(UInt(w.W))
    })
    require (numPorts > 0)
    val arb = Module(new MyArb(numPorts,w))
    for (p <- 0 until numPorts) {
        arb.io.req(p) <> io.req(p) 
    }
    io.out <> arb.io.out
    printf("req: ")
    for (p <- numPorts-1 to 0 by -1) {
        printf("%b", arb.io.req(p).valid)
    }
    printf(" winner: %d (v: %b)\n", arb.io.out.bits, arb.io.out.valid)
}

defined [32mclass[39m [36mArbDemo[39m

## Original ArbDemo: Print Verilog of Our Arbiter (2/3)

In [104]:
println (getVerilog(new ArbDemo(2,8)))

Elaborating design...
Done elaborating.
module MyArb(
  output       io_req_0_ready,
  input        io_req_0_valid,
  input  [7:0] io_req_0_bits,
  output       io_req_1_ready,
  input        io_req_1_valid,
  input  [7:0] io_req_1_bits,
  input        io_out_ready,
  output       io_out_valid,
  output [7:0] io_out_bits
);
  wire [1:0] _enc_T = io_req_1_valid ? 2'h2 : 2'h0; // @[Mux.scala 47:69]
  wire [1:0] enc = io_req_0_valid ? 2'h1 : _enc_T; // @[Mux.scala 47:69]
  wire  chosenOH_0 = enc[0]; // @[OneHot.scala 83:30]
  wire  chosenOH_1 = enc[1]; // @[OneHot.scala 83:30]
  wire  _T = io_out_ready & io_out_valid; // @[Decoupled.scala 40:37]
  wire [1:0] _T_4 = {io_req_1_valid,io_req_0_valid}; // @[cmd101.sc 15:30]
  wire [7:0] _T_6 = chosenOH_0 ? io_req_0_bits : 8'h0; // @[Mux.scala 27:72]
  wire [7:0] _T_7 = chosenOH_1 ? io_req_1_bits : 8'h0; // @[Mux.scala 27:72]
  assign io_req_0_ready = chosenOH_0 & _T; // @[cmd101.sc 11:40]
  assign io_req_1_ready = chosenOH_1 & _T; // @[cmd101.

## Orginal ArbDemo: Testing (3/3)

In [105]:
// printVerilog(new ArbDemo(2,8))

val numPorts = 4
test(new ArbDemo(numPorts,8)) { c =>
    c.io.out.ready.poke(true.B)
    for (cycle <- 0 until 5) {
        for (p <- 0 until numPorts) {
            c.io.req(p).bits.poke(p.U)
            c.io.req(p).valid.poke((p >= cycle).B)
        }
        c.clock.step()
    }
}

Elaborating design...
Done elaborating.
req: 1111 winner:    0 (v: 1)
req: 1110 winner:    1 (v: 1)
req: 1100 winner:    2 (v: 1)
req: 1000 winner:    3 (v: 1)
req: 0000 winner:    0 (v: 0)
req: 0000 winner:    0 (v: 0)
test ArbDemo Success: 0 tests passed in 7 cycles in 0.007482 seconds 935.59 Hz


[36mnumPorts[39m: [32mInt[39m = [32m4[39m

## 8.12 MyArb ##
### MyArb (Our Own Arbiter) ###

<p>
<img src="images/myarb.svg" alt="arbiter schematic" style="width:60%;align=left"/>

## 8.13 Implement Our Own Arbiter (MyArb) (2/3) ##

In [101]:
class MyArb(numPorts: Int, w: Int) extends Module {
    val io = IO(new Bundle {
        val req = Flipped(Vec(numPorts, Decoupled(UInt(w.W))))
        val out = Decoupled(UInt(w.W))
    })
    require (numPorts > 0)
    val inValids = Wire(Vec(numPorts, Bool()))
    val inBits   = Wire(Vec(numPorts, UInt(w.W)))
    for (p <- 0 until numPorts) {
        io.req(p).ready := false.B
        inValids(p) := io.req(p).valid
        inBits(p) := io.req(p).bits
    }
    val chosenOH = PriorityEncoderOH(inValids)
    io.out.valid := inValids.asUInt.orR
    io.out.bits := Mux1H(chosenOH, inBits)
    val chosen = OHToUInt(chosenOH)
    when (io.out.fire) {
        io.req(chosen).ready := true.B
    }
}

defined [32mclass[39m [36mMyArb[39m

## 8.14 Implementing Our Own Arbiter (3/3) ##

In [102]:
class MyArb(numPorts: Int, w: Int) extends Module {
    val io = IO(new Bundle {
        val req = Flipped(Vec(numPorts, Decoupled(UInt(w.W))))
        val out = Decoupled(UInt(w.W))
    })
    require (numPorts > 0)
    val inValids = Wire(Vec(numPorts, Bool()))
    val inBits   = Wire(Vec(numPorts, UInt(w.W)))
    val chosenOH = PriorityEncoderOH(inValids)
    for (p <- 0 until numPorts) {
        io.req(p).ready := chosenOH(p) && io.out.fire
        inValids(p) := io.req(p).valid
        inBits(p) := io.req(p).bits
    }
    io.out.valid := inValids.asUInt.orR
    io.out.bits := Mux1H(chosenOH, inBits)
}

defined [32mclass[39m [36mMyArb[39m

## 8.15 Example Crossbar in Chisel

* Connects `numIns` input ports to `numOuts` output ports
  * All ports are `Decoupled`

<img src="images/xbar.svg" alt="xbar schematic" style="width:50%;align:left"/>

## 8.16 Example Crossbar Implementation (1/2)

In [22]:
class Message(numOuts: Int, length: Int) extends Bundle {
    val addr = UInt(log2Ceil(numOuts).W)
    val data = UInt(length.W)
}

class XBarIO(numIns: Int, numOuts: Int, length: Int) extends Bundle {
    val in  = Vec(numIns, Flipped(Decoupled(new Message(numOuts, length))))
    val out = Vec(numOuts, Decoupled(new Message(numOuts, length)))
}

defined [32mclass[39m [36mMessage[39m
defined [32mclass[39m [36mXBarIO[39m

## 8.17 Example Crossbar Implementation (2/2)

In [106]:
class XBar(numIns: Int, numOuts: Int, length: Int) extends Module {
    val io = IO(new XBarIO(numIns, numOuts, length))
    val arbs = Seq.fill(numOuts)(Module(new RRArbiter(new Message(numOuts, length), numIns)))
    for (ip <- 0 until numIns) {
        val inReadys = Wire(Vec(numOuts, Bool()))
        for (op <- 0 until numOuts) {
            inReadys(op) := arbs(op).io.in(ip).ready
        }
        io.in(ip).ready := inReadys(io.in(ip).bits.addr)
    }
    for (op <- 0 until numOuts) {
        for (ip <- 0 until numIns) {
            arbs(op).io.in(ip).bits <> io.in(ip).bits
            arbs(op).io.in(ip).valid := io.in(ip).valid && (io.in(ip).bits.addr === op.U)
        }
        io.out(op) <> arbs(op).io.out
    }
    for (op <- 0 until numOuts) {
        printf(" %d -> %d (%b)", io.out(op).bits.data, op.U, io.out(op).valid)
    }
    printf("\n")
}

// printVerilog(new XBar(2,1,8))

defined [32mclass[39m [36mXBar[39m

In [107]:
// Error in Verilog 
// println (getVerilog(new XBar(2,1,8)))

## Example Crossbar Demo

In [23]:
// Below code run forever.
/*
val numIns = 4
val numOuts = 2
test(new XBar(numIns,numOuts,8)) { c =>
    for (ip <- 0 until numIns) {
        c.io.in(ip).valid.poke(true.B)
        c.io.in(ip).bits.data.poke(ip.U)
        c.io.in(ip).bits.addr.poke((ip % numOuts).U)
    }
    for (op <- 0 until numOuts) {
        c.io.out(op).ready.poke(true.B)
    }
    for (cycle <- 0 until 4) {
        c.clock.step()
    }
}
*/