<a href="https://colab.research.google.com/github/mgottscho/learn-xls-in-y-minutes/blob/main/learn_xls_in_y_minutes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Learn XLS in Y minutes

In the spirit of https://learnxinyminutes.com/ this notebook show the basic of [XLS toolchain](https://google.github.io/xls/) and the [DSLX language](https://google.github.io/xls/dslx_reference/) thru heavily annotated interactive examples.

In [1]:
#@title Setup {run:"auto"}
#@markdown - Click the ▷ button to setup the given version of the XLS toolchain with integration in the colab environment.

xls_version = 'v0.0.0-5770-g81b9f5599' #@param {type:"string"}
xls_colab_version = 'v0.0.0-5639-g02334a86c'

!echo '📦 downloading xls-{xls_version}'
!curl --show-error -L https://github.com/google/xls/releases/download/{xls_version}/xls-{xls_version}-linux-x64.tar.gz | tar xzf - --strip-components=1
!echo '🧪 setting up colab integration'
!python -m pip install --quiet --no-cache-dir --ignore-installed https://github.com/proppy/xls/releases/download/{xls_colab_version}/xls_colab-0.0.0-py3-none-any.whl
import xls.contrib.colab
_ = xls.contrib.colab.register_dslx_magic()

📦 downloading xls-v0.0.0-5770-g81b9f5599
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 59.8M  100 59.8M    0     0  8509k      0  0:00:07  0:00:07 --:--:--  9.7M
🧪 setting up colab integration
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.6/189.6 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
#@title {run:"auto"}
#@markdown - Click the ▷ button to  select the given PDK.

xls.contrib.colab.pdk = 'asap7' #@param ["asap7", "sky130"] {allow-input: false}

In [3]:
%%dslx --top=adder8 --pipeline_stages=1 --flop_inputs=false --flop_outputs=false
// set `adder8` ↑ as the design entrypoint and set number of pipeline stages to `1` w/ no flop inputs/outputs to generate a combinational circuit.

// define a function named `adder8` with two parameters `a` and `b` and one return value (after `->`).
fn adder8(a: u8, b: u8) -> u8 { // u8 is the type for 8-bit unsigned integers
                                // similar type like `u1`, `u2`, `u3`, `s4` (for signed 4-bit integer) are pre-defined
                                // more explicit form like `uN[5]` (equivalent to `u5`) or `bits[6]` (equivalent to `u6`) are also permitted.
  // last expression of a function denotes the return value.
  a + b
}

// define a unittest function named `adders_test` with the `#[test]` annotation.
#[test]
fn adders_test() {
  // assert function return value against the expected value.
  assert_eq(adder8(u8:41, u8:1), u8:42);
}

// the interpreter tab ↓ show the test function passing (all assert succeeded) and the verilog tab shows generated SystemVerilog.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

top fn __user_module__adder8(a: bits[8], b: bits[8]) -> bits[8] {
  ret add.3: bits[8] = add(a, b, id=3, pos=[(0,8,4)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

top fn __user_module__adder8(a: bits[8], b: bits[8]) -> bits[8] {
  ret add.3: bits[8] = add(a, b, id=3, pos=[(0,8,4)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  output wire [7:0] out
);
  // ===== Pipe stage 0:
  wire [7:0] p0_add_6_comb;
  assign p0_add_6_comb = a + b;
  assign out = p0_add_6_comb;
endmodule



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,add.3,212,212


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
MISC,INPUT_PORT,0,8,0,2
MISC,OUTPUT_PORT,8,0,1,1


<IPython.core.display.Javascript object>

In [4]:
%%dslx --top=adder8_with_carry --pipeline_stages=1 --flop_inputs=false --flop_outputs=false

fn adder8_with_carry(a: u8, b: u8) -> (u8, u1) { // returns two values of type `u8` and `u1` (as a tuple).
  // use `let` to bind the intermediate result to identifier `sum_with_carry`.
  let sum_with_carry: u9 = a as u9 + b as u9; // widen `a` and `b` to 9 bits before adding them.
  let sum = sum_with_carry[0:8]; // access a range of bits from bit `0` (inclusive) to bit `8` (exclusive).
  let carry_bit = sum_with_carry[8+:u1]; // access a range of `u1` bits (1 bit) from bit `8`.

  // return `sum` and `carry_bit` as a tuple of 2 values.
  (sum, carry_bit)
}

#[test]
fn adders_test() {
  // assert function return values when carry = 0
  assert_eq(adder8_with_carry(u8:41, u8:1), (u8:42, u1:0));
  // assert function return values when carry = 1
  assert_eq(adder8_with_carry(u8:255, u8:1), (u8:0 , u1:1));
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

top fn __user_module__adder8_with_carry(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  zero_ext.3: bits[9] = zero_ext(a, new_bit_count=9, id=3)
  zero_ext.4: bits[9] = zero_ext(b, new_bit_count=9, id=4)
  sum_with_carry: bits[9] = add(zero_ext.3, zero_ext.4, id=5, pos=[(0,4,35)])
  literal.7: bits[9] = literal(value=8, id=7, pos=[(0,6,33)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=6, pos=[(0,5,26)])
  carry_bit: bits[1] = dynamic_bit_slice(sum_with_carry, literal.7, width=1, id=8, pos=[(0,6,32)])
  ret tuple.9: (bits[8], bits[1]) = tuple(sum, carry_bit, id=9, pos=[(0,9,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

top fn __user_module__adder8_with_carry(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  literal.11: bits[1] = literal(value=0, id=11)
  concat.12: bits[9] = concat(literal.11, a, id=12)
  concat.14: bits[9] = concat(literal.11, b, id=14)
  sum_with_carry: bits[9] = add(concat.12, concat.14, id=5, pos=[(0,4,35)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=6, pos=[(0,5,26)])
  carry_bit: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=15, pos=[(0,6,32)])
  ret tuple.9: (bits[8], bits[1]) = tuple(sum, carry_bit, id=9, pos=[(0,9,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  output wire [8:0] out
);
  // ===== Pipe stage 0:
  wire [8:0] p0_sum_with_carry_comb;
  wire [7:0] p0_sum_comb;
  wire p0_carry_bit_comb;
  assign p0_sum_with_carry_comb = {1'h0, a} + {1'h0, b};
  assign p0_sum_comb = p0_sum_with_carry_comb[7:0];
  assign p0_carry_bit_comb = p0_sum_with_carry_comb[8];
  assign out = {p0_sum_comb, p0_carry_bit_comb};
endmodule



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,literal.11,0,0
0,a,0,0
0,literal.19,0,0
0,b,0,0
0,concat.12,0,0
0,concat.14,0,0
0,sum_with_carry,224,224
0,sum,0,224
0,carry_bit,0,224
0,tuple.9,0,224


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,9,9,2,1
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1
INSIGNIFICANT,TUPLE,8,9,2,1
MISC,INPUT_PORT,0,8,0,2
MISC,OUTPUT_PORT,9,0,1,1


<IPython.core.display.Javascript object>

In [5]:
%%dslx --top=adder8_with_carry --pipeline_stages=1 --flop_inputs=false --flop_outputs=false

import std; // import the package `std` to reuse existing functionality.

fn adder8_with_carry(a: u8, b: u8) -> (u8, u1) {
  let sum_with_carry: u9 = std::uadd(a, b); // reuse `uadd` function rather than implementing our own.
  let sum = sum_with_carry[0:8];
  let carry_bit = sum_with_carry[8+:u1];
  (sum, carry_bit)
}

#[test]
fn adders_test() {
  assert_eq(adder8_with_carry(u8:41, u8:1), (u8:42, u1:0));
  assert_eq(adder8_with_carry(u8:255, u8:1), (u8:0 , u1:1));
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,154,20)])
  R: bits[32] = literal(value=9, id=5, pos=[(0,154,28)])
  ret add.8: bits[9] = add(zero_ext.6, zero_ext.7, id=8, pos=[(0,155,17)])
}

top fn __user_module__adder8_with_carry(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  sum_with_carry: bits[9] = invoke(a, b, to_apply=__std__uadd__8_8_9, id=11, pos=[(1,5,36)])
  literal.13: bits[9] = literal(value=8, id=13, pos=[(1,7,33)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=12, pos=[(1,6,26)])
  carry_bit: bits[1] = dynamic_bit_slice(sum_with_carry, literal.13, width=1, id=14, pos=[(1,7,32)])
  ret tuple.15: (bits[8], bits[1]) = tupl

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__adder8_with_carry(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  literal.24: bits[1] = literal(value=0, id=24, pos=[(1,5,36)])
  concat.25: bits[9] = concat(literal.24, a, id=25, pos=[(1,5,36)])
  concat.26: bits[9] = concat(literal.24, b, id=26, pos=[(1,5,36)])
  sum_with_carry: bits[9] = add(concat.25, concat.26, id=27, pos=[(0,155,17)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=12, pos=[(1,6,26)])
  carry_bit: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=23, pos=[(1,7,32)])
  ret tuple.15: (bits[8], bits[1]) = tuple(sum, carry_bit, id=15, pos=[(1,8,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  output wire [8:0] out
);
  // ===== Pipe stage 0:
  wire [8:0] p0_sum_with_carry_comb;
  wire [7:0] p0_sum_comb;
  wire p0_carry_bit_comb;
  assign p0_sum_with_carry_comb = {1'h0, a} + {1'h0, b};
  assign p0_sum_comb = p0_sum_with_carry_comb[7:0];
  assign p0_carry_bit_comb = p0_sum_with_carry_comb[8];
  assign out = {p0_sum_comb, p0_carry_bit_comb};
endmodule



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,literal.24,0,0
0,a,0,0
0,literal.31,0,0
0,b,0,0
0,concat.25,0,0
0,concat.26,0,0
0,sum_with_carry,224,224
0,sum,0,224
0,carry_bit,0,224
0,tuple.15,0,224


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,9,9,2,1
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1
INSIGNIFICANT,TUPLE,8,9,2,1
MISC,INPUT_PORT,0,8,0,2
MISC,OUTPUT_PORT,9,0,1,1


<IPython.core.display.Javascript object>

In [6]:
%%dslx --top=adder8_with_carry --pipeline_stages=1 --flop_inputs=false --flop_outputs=false

import std;

// generalize `adder_with_carry` with a bit-length parameter `N` of type u32.
fn adder_with_carry<N: u32>(a: uN[N], b: uN[N]) -> (uN[N], u1) { // result is N bit long, carry is 1 bit long.
  let sum_with_carry = std::uadd(a, b); // std::uadd is also parametric: result type is inferred as uN[N+1].
  let sum = sum_with_carry[0:N as s32]; // slice indexes are signed values.
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn adder8_with_carry(a: u8, b: u8) -> (u8, u1) {
  // set bit-length parameter explicitly from a literal.
  adder_with_carry<u32:8>(a, b)
}

// define a module level constant for the bit-length.
const BIT_LENGTH = u32:4;

#[test]
fn adders_test() {
  assert_eq(adder_with_carry<BIT_LENGTH>(u4:14, u4:1), (u4:15, u1:0)); // set bit-length parameter explicitly from a constant.
  assert_eq(adder_with_carry(u8:255, u8:1), (u8:0 , u1:1)); // infer bit length parameter from arguments.
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,154,20)])
  R: bits[32] = literal(value=9, id=5, pos=[(0,154,28)])
  ret add.8: bits[9] = add(zero_ext.6, zero_ext.7, id=8, pos=[(0,155,17)])
}

fn __user_module__adder_with_carry__8(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  sum_with_carry: bits[9] = invoke(a, b, to_apply=__std__uadd__8_8_9, id=12, pos=[(1,6,32)])
  N: bits[32] = literal(value=8, id=11, pos=[(1,5,20)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=13, pos=[(1,7,26)])
  carry_bit: bits[1] = dynamic_bit_slice(sum_with_carry, N, width=1, id=14, pos=[(1,8,32)])
  ret tuple.15: (bits[8], bits[1]) = tuple(sum, carry_bit, i

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__adder8_with_carry(a: bits[8], b: bits[8]) -> (bits[8], bits[1]) {
  literal.31: bits[1] = literal(value=0, id=31, pos=[(1,6,32)])
  concat.32: bits[9] = concat(literal.31, a, id=32, pos=[(1,6,32)])
  concat.33: bits[9] = concat(literal.31, b, id=33, pos=[(1,6,32)])
  sum_with_carry: bits[9] = add(concat.32, concat.33, id=34, pos=[(0,155,17)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=35, pos=[(1,7,26)])
  carry_bit: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=36, pos=[(1,8,32)])
  ret tuple.37: (bits[8], bits[1]) = tuple(sum, carry_bit, id=37, pos=[(1,9,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  output wire [8:0] out
);
  // ===== Pipe stage 0:
  wire [8:0] p0_sum_with_carry_comb;
  wire [7:0] p0_sum_comb;
  wire p0_carry_bit_comb;
  assign p0_sum_with_carry_comb = {1'h0, a} + {1'h0, b};
  assign p0_sum_comb = p0_sum_with_carry_comb[7:0];
  assign p0_carry_bit_comb = p0_sum_with_carry_comb[8];
  assign out = {p0_sum_comb, p0_carry_bit_comb};
endmodule



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,literal.31,0,0
0,a,0,0
0,literal.38,0,0
0,b,0,0
0,concat.32,0,0
0,concat.33,0,0
0,sum_with_carry,224,224
0,sum,0,224
0,carry_bit,0,224
0,tuple.37,0,224


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,9,9,2,1
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1
INSIGNIFICANT,TUPLE,8,9,2,1
MISC,INPUT_PORT,0,8,0,2
MISC,OUTPUT_PORT,9,0,1,1


<IPython.core.display.Javascript object>

In [7]:
%%dslx --top=muladd8 --pipeline_stages=2
// set number of pipeline stages ↑ explicitly to `2`.

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N+N], u1) {
  let product = std::umul(a, b); // std::umul is parametric: result type is inferred as uN[N*2].
  let sum_with_carry = std::uadd(product, c);
  let sum = sum_with_carry[0:N as s32 * s32:2]; // slice indexes are signed values.
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u16, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u16:5 , u1:0));
}

// schedule tab ↓ shows two pipeline stages: with `product` in stage 0 and `sum_with_carry` in stage 1.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__umul__8_8_16(x: bits[8], y: bits[8]) -> bits[16] {
  zero_ext.6: bits[16] = zero_ext(x, new_bit_count=16, id=6)
  zero_ext.7: bits[16] = zero_ext(y, new_bit_count=16, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=16, id=5, pos=[(0,179,28)])
  ret umul.8: bits[16] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__uadd__8_16_17(x: bits[16], y: bits[8]) -> bits[17] {
  zero_ext.14: bits[17] = zero_ext(x, new_bit_count=17, id=14)
  zero_ext.15: bits[17] = zero_ext(y, new_bit_count=17, id=15)
  N: bits[32] = literal(value=16, id=11, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=12, pos=[(0,154,20)])
  R: bits[32] = literal(value=17, id=13, pos=[(0,154,28)])
  ret add.16: bits[17] = add(zero_ext.14, zero_ext.15, id=16, pos=[(0,155,17)])
}

fn __user_mod

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[16], bits[1]) {
  literal.61: bits[1] = literal(value=0, id=61, pos=[(1,7,32)])
  product: bits[16] = umul(a, b, id=62, pos=[(0,180,17)])
  literal.63: bits[9] = literal(value=0, id=63, pos=[(1,7,32)])
  concat.64: bits[17] = concat(literal.61, product, id=64, pos=[(1,7,32)])
  concat.65: bits[17] = concat(literal.63, c, id=65, pos=[(1,7,32)])
  sum_with_carry: bits[17] = add(concat.64, concat.65, id=66, pos=[(0,155,17)])
  sum: bits[16] = bit_slice(sum_with_carry, start=0, width=16, id=67, pos=[(1,8,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=68, pos=[(1,9,32)])
  ret tuple.69: (bits[16], bits[1]) = tuple(sum, carry_bit__1, id=69, pos=[(1,10,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  input wire [7:0] c,
  output wire [16:0] out
);
  // lint_off MULTIPLY
  function automatic [15:0] umul16b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    begin
      umul16b_8b_x_8b = lhs * rhs;
    end
  endfunction
  // lint_on MULTIPLY

  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [7:0] p0_a;
  reg [7:0] p0_b;
  reg [7:0] p0_c;
  always_ff @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
    p0_c <= c;
  end

  // ===== Pipe stage 1:
  wire [15:0] p1_product_comb;
  assign p1_product_comb = umul16b_8b_x_8b(p0_a, p0_b);

  // Registers for pipe stage 1:
  reg [7:0] p1_c;
  reg [15:0] p1_product;
  always_ff @ (posedge clk) begin
    p1_c <= p0_c;
    p1_product <= p1_product_comb;
  end

  // ===== Pipe stage 2:
  wire [16:0] p2_sum_with_carry_comb;
  wire [15:0] p2_sum_comb;
  wire p2_carry_bit__1_comb;
  wire [16:0] p2_tuple_91_comb;
  assign p2_sum_with_carry_comb = 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,c,0,0
0,product,716,716
1,literal.61,0,0
1,literal.63,0,0
1,concat.64,0,0
1,concat.65,0,0
1,sum_with_carry,290,290
1,sum,0,290


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,17,17,2,1
INSIGNIFICANT,BIT_SLICE,17,1,1,1
INSIGNIFICANT,BIT_SLICE,17,16,1,1
INSIGNIFICANT,CONCAT,9,17,2,1
INSIGNIFICANT,CONCAT,16,17,2,1
INSIGNIFICANT,LITERAL,0,1,0,1
INSIGNIFICANT,LITERAL,0,9,0,1
INSIGNIFICANT,TUPLE,16,17,2,1
MISC,REGISTER_READ,0,8,0,4
MISC,REGISTER_WRITE,8,0,1,4


<IPython.core.display.Javascript object>

In [8]:
%%dslx --top=muladd8 --clock_period_ps=800
// set target clock frequency to `800`ps and let XLS schedule the pipeline stages.

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N+N], u1) {
  let product = std::umul(a, b);
  let sum_with_carry = std::uadd(product, c);
  let sum = sum_with_carry[0:N as s32 * s32:2];
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u16, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u16:5 , u1:0));
}

// schedule tab ↓ shows the same 2 pipeline stages: with `product` in stage 0 and `sum_with_carry` in stage 1.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__umul__8_8_16(x: bits[8], y: bits[8]) -> bits[16] {
  zero_ext.6: bits[16] = zero_ext(x, new_bit_count=16, id=6)
  zero_ext.7: bits[16] = zero_ext(y, new_bit_count=16, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=16, id=5, pos=[(0,179,28)])
  ret umul.8: bits[16] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__uadd__8_16_17(x: bits[16], y: bits[8]) -> bits[17] {
  zero_ext.14: bits[17] = zero_ext(x, new_bit_count=17, id=14)
  zero_ext.15: bits[17] = zero_ext(y, new_bit_count=17, id=15)
  N: bits[32] = literal(value=16, id=11, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=12, pos=[(0,154,20)])
  R: bits[32] = literal(value=17, id=13, pos=[(0,154,28)])
  ret add.16: bits[17] = add(zero_ext.14, zero_ext.15, id=16, pos=[(0,155,17)])
}

fn __user_mod

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[16], bits[1]) {
  literal.61: bits[1] = literal(value=0, id=61, pos=[(1,7,32)])
  product: bits[16] = umul(a, b, id=62, pos=[(0,180,17)])
  literal.63: bits[9] = literal(value=0, id=63, pos=[(1,7,32)])
  concat.64: bits[17] = concat(literal.61, product, id=64, pos=[(1,7,32)])
  concat.65: bits[17] = concat(literal.63, c, id=65, pos=[(1,7,32)])
  sum_with_carry: bits[17] = add(concat.64, concat.65, id=66, pos=[(0,155,17)])
  sum: bits[16] = bit_slice(sum_with_carry, start=0, width=16, id=67, pos=[(1,8,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=68, pos=[(1,9,32)])
  ret tuple.69: (bits[16], bits[1]) = tuple(sum, carry_bit__1, id=69, pos=[(1,10,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  input wire [7:0] c,
  output wire [16:0] out
);
  // lint_off MULTIPLY
  function automatic [15:0] umul16b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    begin
      umul16b_8b_x_8b = lhs * rhs;
    end
  endfunction
  // lint_on MULTIPLY

  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [7:0] p0_a;
  reg [7:0] p0_b;
  reg [7:0] p0_c;
  always_ff @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
    p0_c <= c;
  end

  // ===== Pipe stage 1:
  wire [15:0] p1_product_comb;
  assign p1_product_comb = umul16b_8b_x_8b(p0_a, p0_b);

  // Registers for pipe stage 1:
  reg [7:0] p1_c;
  reg [15:0] p1_product;
  always_ff @ (posedge clk) begin
    p1_c <= p0_c;
    p1_product <= p1_product_comb;
  end

  // ===== Pipe stage 2:
  wire [16:0] p2_sum_with_carry_comb;
  wire [15:0] p2_sum_comb;
  wire p2_carry_bit__1_comb;
  wire [16:0] p2_tuple_91_comb;
  assign p2_sum_with_carry_comb = 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,c,0,0
0,product,716,716
1,literal.61,0,0
1,literal.63,0,0
1,concat.64,0,0
1,concat.65,0,0
1,sum_with_carry,290,290
1,sum,0,290


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,17,17,2,1
INSIGNIFICANT,BIT_SLICE,17,1,1,1
INSIGNIFICANT,BIT_SLICE,17,16,1,1
INSIGNIFICANT,CONCAT,9,17,2,1
INSIGNIFICANT,CONCAT,16,17,2,1
INSIGNIFICANT,LITERAL,0,1,0,1
INSIGNIFICANT,LITERAL,0,9,0,1
INSIGNIFICANT,TUPLE,16,17,2,1
MISC,REGISTER_READ,0,8,0,4
MISC,REGISTER_WRITE,8,0,1,4


<IPython.core.display.Javascript object>

In [9]:
%%dslx --top=muladd8 --clock_period_ps=1000
// set a more generous target clock frequency to `1000`ps.

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N+N], u1) {
  let product = std::umul(a, b);
  let sum_with_carry = std::uadd(product, c);
  let sum = sum_with_carry[0:N as s32 * s32:2];
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u16, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u16:5 , u1:0));
}

// schedule tab ↓ shows a single pipeline stage with `product` and `sum_with_carry`.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__umul__8_8_16(x: bits[8], y: bits[8]) -> bits[16] {
  zero_ext.6: bits[16] = zero_ext(x, new_bit_count=16, id=6)
  zero_ext.7: bits[16] = zero_ext(y, new_bit_count=16, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=16, id=5, pos=[(0,179,28)])
  ret umul.8: bits[16] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__uadd__8_16_17(x: bits[16], y: bits[8]) -> bits[17] {
  zero_ext.14: bits[17] = zero_ext(x, new_bit_count=17, id=14)
  zero_ext.15: bits[17] = zero_ext(y, new_bit_count=17, id=15)
  N: bits[32] = literal(value=16, id=11, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=12, pos=[(0,154,20)])
  R: bits[32] = literal(value=17, id=13, pos=[(0,154,28)])
  ret add.16: bits[17] = add(zero_ext.14, zero_ext.15, id=16, pos=[(0,155,17)])
}

fn __user_mod

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[16], bits[1]) {
  literal.61: bits[1] = literal(value=0, id=61, pos=[(1,7,32)])
  product: bits[16] = umul(a, b, id=62, pos=[(0,180,17)])
  literal.63: bits[9] = literal(value=0, id=63, pos=[(1,7,32)])
  concat.64: bits[17] = concat(literal.61, product, id=64, pos=[(1,7,32)])
  concat.65: bits[17] = concat(literal.63, c, id=65, pos=[(1,7,32)])
  sum_with_carry: bits[17] = add(concat.64, concat.65, id=66, pos=[(0,155,17)])
  sum: bits[16] = bit_slice(sum_with_carry, start=0, width=16, id=67, pos=[(1,8,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=68, pos=[(1,9,32)])
  ret tuple.69: (bits[16], bits[1]) = tuple(sum, carry_bit__1, id=69, pos=[(1,10,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  input wire [7:0] c,
  output wire [16:0] out
);
  // lint_off MULTIPLY
  function automatic [15:0] umul16b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    begin
      umul16b_8b_x_8b = lhs * rhs;
    end
  endfunction
  // lint_on MULTIPLY

  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [7:0] p0_a;
  reg [7:0] p0_b;
  reg [7:0] p0_c;
  always_ff @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
    p0_c <= c;
  end

  // ===== Pipe stage 1:
  wire [15:0] p1_product_comb;
  assign p1_product_comb = umul16b_8b_x_8b(p0_a, p0_b);

  // Registers for pipe stage 1:
  reg [7:0] p1_c;
  reg [15:0] p1_product;
  always_ff @ (posedge clk) begin
    p1_c <= p0_c;
    p1_product <= p1_product_comb;
  end

  // ===== Pipe stage 2:
  wire [16:0] p2_sum_with_carry_comb;
  wire [15:0] p2_sum_comb;
  wire p2_carry_bit__1_comb;
  wire [16:0] p2_tuple_91_comb;
  assign p2_sum_with_carry_comb = 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,c,0,0
0,product,716,716
1,literal.61,0,0
1,literal.63,0,0
1,concat.64,0,0
1,concat.65,0,0
1,sum_with_carry,290,290
1,sum,0,290


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,17,17,2,1
INSIGNIFICANT,BIT_SLICE,17,1,1,1
INSIGNIFICANT,BIT_SLICE,17,16,1,1
INSIGNIFICANT,CONCAT,9,17,2,1
INSIGNIFICANT,CONCAT,16,17,2,1
INSIGNIFICANT,LITERAL,0,1,0,1
INSIGNIFICANT,LITERAL,0,9,0,1
INSIGNIFICANT,TUPLE,16,17,2,1
MISC,REGISTER_READ,0,8,0,4
MISC,REGISTER_WRITE,8,0,1,4


<IPython.core.display.Javascript object>

In [10]:
%%dslx --top=muladd8 --clock_period_ps=1000
// set target clock frequency to `1000`ps and let XLS schedule the pipeline stages.

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N+N], u1) {
  let product = std::umul(a, b);
  let sum_with_carry = std::uadd(product, c);
  let sum = sum_with_carry[0:N as s32 * s32:2];
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u16, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u16:5 , u1:0));
}

// schedule tab ↓ shows the same 2 pipeline stages: with `product` in stage 0 and `sum_with_carry` in stage 1.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__umul__8_8_16(x: bits[8], y: bits[8]) -> bits[16] {
  zero_ext.6: bits[16] = zero_ext(x, new_bit_count=16, id=6)
  zero_ext.7: bits[16] = zero_ext(y, new_bit_count=16, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=16, id=5, pos=[(0,179,28)])
  ret umul.8: bits[16] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__uadd__8_16_17(x: bits[16], y: bits[8]) -> bits[17] {
  zero_ext.14: bits[17] = zero_ext(x, new_bit_count=17, id=14)
  zero_ext.15: bits[17] = zero_ext(y, new_bit_count=17, id=15)
  N: bits[32] = literal(value=16, id=11, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=12, pos=[(0,154,20)])
  R: bits[32] = literal(value=17, id=13, pos=[(0,154,28)])
  ret add.16: bits[17] = add(zero_ext.14, zero_ext.15, id=16, pos=[(0,155,17)])
}

fn __user_mod

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[16], bits[1]) {
  literal.61: bits[1] = literal(value=0, id=61, pos=[(1,7,32)])
  product: bits[16] = umul(a, b, id=62, pos=[(0,180,17)])
  literal.63: bits[9] = literal(value=0, id=63, pos=[(1,7,32)])
  concat.64: bits[17] = concat(literal.61, product, id=64, pos=[(1,7,32)])
  concat.65: bits[17] = concat(literal.63, c, id=65, pos=[(1,7,32)])
  sum_with_carry: bits[17] = add(concat.64, concat.65, id=66, pos=[(0,155,17)])
  sum: bits[16] = bit_slice(sum_with_carry, start=0, width=16, id=67, pos=[(1,8,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=68, pos=[(1,9,32)])
  ret tuple.69: (bits[16], bits[1]) = tuple(sum, carry_bit__1, id=69, pos=[(1,10,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  input wire [7:0] c,
  output wire [16:0] out
);
  // lint_off MULTIPLY
  function automatic [15:0] umul16b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    begin
      umul16b_8b_x_8b = lhs * rhs;
    end
  endfunction
  // lint_on MULTIPLY

  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [7:0] p0_a;
  reg [7:0] p0_b;
  reg [7:0] p0_c;
  always_ff @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
    p0_c <= c;
  end

  // ===== Pipe stage 1:
  wire [15:0] p1_product_comb;
  assign p1_product_comb = umul16b_8b_x_8b(p0_a, p0_b);

  // Registers for pipe stage 1:
  reg [7:0] p1_c;
  reg [15:0] p1_product;
  always_ff @ (posedge clk) begin
    p1_c <= p0_c;
    p1_product <= p1_product_comb;
  end

  // ===== Pipe stage 2:
  wire [16:0] p2_sum_with_carry_comb;
  wire [15:0] p2_sum_comb;
  wire p2_carry_bit__1_comb;
  wire [16:0] p2_tuple_91_comb;
  assign p2_sum_with_carry_comb = 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,c,0,0
0,product,716,716
1,literal.61,0,0
1,literal.63,0,0
1,concat.64,0,0
1,concat.65,0,0
1,sum_with_carry,290,290
1,sum,0,290


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,17,17,2,1
INSIGNIFICANT,BIT_SLICE,17,1,1,1
INSIGNIFICANT,BIT_SLICE,17,16,1,1
INSIGNIFICANT,CONCAT,9,17,2,1
INSIGNIFICANT,CONCAT,16,17,2,1
INSIGNIFICANT,LITERAL,0,1,0,1
INSIGNIFICANT,LITERAL,0,9,0,1
INSIGNIFICANT,TUPLE,16,17,2,1
MISC,REGISTER_READ,0,8,0,4
MISC,REGISTER_WRITE,8,0,1,4


<IPython.core.display.Javascript object>

In [11]:
%%dslx --top=muladd8 --clock_period_ps=700
// set a more aggressive target clock frequency of `700`

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N+N], u1) {
  let product = std::umul(a, b);
  let sum_with_carry = std::uadd(product, c);
  let sum = sum_with_carry[0:N as s32 * s32:2];
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u16, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u16:5 , u1:0));
}

// XLS can't schedule the pipeline stages because of of the cost the multiplier: `716ps` for this technology exceeding the target clock frequency: `700ps`.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__umul__8_8_16(x: bits[8], y: bits[8]) -> bits[16] {
  zero_ext.6: bits[16] = zero_ext(x, new_bit_count=16, id=6)
  zero_ext.7: bits[16] = zero_ext(y, new_bit_count=16, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=16, id=5, pos=[(0,179,28)])
  ret umul.8: bits[16] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__uadd__8_16_17(x: bits[16], y: bits[8]) -> bits[17] {
  zero_ext.14: bits[17] = zero_ext(x, new_bit_count=17, id=14)
  zero_ext.15: bits[17] = zero_ext(y, new_bit_count=17, id=15)
  N: bits[32] = literal(value=16, id=11, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=12, pos=[(0,154,20)])
  R: bits[32] = literal(value=17, id=13, pos=[(0,154,28)])
  ret add.16: bits[17] = add(zero_ext.14, zero_ext.15, id=16, pos=[(0,155,17)])
}

fn __user_mod

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[16], bits[1]) {
  literal.61: bits[1] = literal(value=0, id=61, pos=[(1,7,32)])
  product: bits[16] = umul(a, b, id=62, pos=[(0,180,17)])
  literal.63: bits[9] = literal(value=0, id=63, pos=[(1,7,32)])
  concat.64: bits[17] = concat(literal.61, product, id=64, pos=[(1,7,32)])
  concat.65: bits[17] = concat(literal.63, c, id=65, pos=[(1,7,32)])
  sum_with_carry: bits[17] = add(concat.64, concat.65, id=66, pos=[(0,155,17)])
  sum: bits[16] = bit_slice(sum_with_carry, start=0, width=16, id=67, pos=[(1,8,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=68, pos=[(1,9,32)])
  ret tuple.69: (bits[16], bits[1]) = tuple(sum, carry_bit__1, id=69, pos=[(1,10,2)])
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

xls.contrib.colab.XlsRuntimeError(b'E1210 16:38:58.264410    6785 run_pipeline_schedule.cc:472] Unable to schedule with the specified clock period (700 ps); finding the shortest feasible clock period...\nError: INVALID_ARGUMENT: cannot achieve the specified clock period. Try `--clock_period_ps=716`.; Running pass #49: Pipeline Scheduling [short: pipesched]; Running pass #49: Top level scheduling pass pipeline [short: scheduling]')

In [12]:
%%dslx --top=muladd8 --clock_period_ps=700

import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (uN[N], u1) {
  let (product_a, product_b) = umulp(a, b); // uses `umulp` partial multiplication to allow additional pipelining.
  let product = std::uadd(product_a, product_b);
  let product_carry = product[N+:u1];
  let sum_with_carry = std::uadd(product[0+:uN[N]], c);
  let sum = sum_with_carry[0:N as s32]; // slice indexes are signed values.
  let carry_bit = sum_with_carry[N+:u1];
  (sum, carry_bit)
}

fn muladd8(a: u8, b: u8, c: u8) -> (u8, u1) {
  muladd(a, b, c)
}

#[test]
fn adders_test() {
  assert_eq(muladd8(u8:1, u8:2, u8:3), (u8:5 , u1:0));
}

// schedule tab ↓ shows 2 pipeline stages with a shorter path_delay: `697`.

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] adders_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,154,20)])
  R: bits[32] = literal(value=9, id=5, pos=[(0,154,28)])
  ret add.8: bits[9] = add(zero_ext.6, zero_ext.7, id=8, pos=[(0,155,17)])
}

fn __user_module__muladd__8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[8], bits[1]) {
  umulp.13: (bits[8], bits[8]) = umulp(a, b, id=13)
  product_a: bits[8] = tuple_index(umulp.13, index=0, id=14, pos=[(1,5,7)])
  product_b: bits[8] = tuple_index(umulp.13, index=1, id=15, pos=[(1,5,18)])
  product: bits[9] = invoke(product_a, product_b, to_apply=__std__uadd__8_8_9, id=16, pos=[(1,6,25)])
  literal.18: bits[9] = literal(value=0, id=18, pos=[(1,8,41)])
  dynamic_bi

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

top fn __user_module__muladd8(a: bits[8], b: bits[8], c: bits[8]) -> (bits[8], bits[1]) {
  umulp.45: (bits[8], bits[8]) = umulp(a, b, id=45, pos=[(1,15,8)])
  product_a: bits[8] = tuple_index(umulp.45, index=0, id=47, pos=[(1,5,7)])
  product_b: bits[8] = tuple_index(umulp.45, index=1, id=48, pos=[(1,5,18)])
  literal.52: bits[1] = literal(value=0, id=52, pos=[(1,8,32)])
  product__1: bits[8] = add(product_a, product_b, id=62, pos=[(0,155,17)])
  concat.54: bits[9] = concat(literal.52, product__1, id=54, pos=[(1,8,32)])
  concat.55: bits[9] = concat(literal.52, c, id=55, pos=[(1,8,32)])
  sum_with_carry: bits[9] = add(concat.54, concat.55, id=56, pos=[(0,155,17)])
  sum: bits[8] = bit_slice(sum_with_carry, start=0, width=8, id=57, pos=[(1,9,26)])
  carry_bit__1: bits[1] = bit_slice(sum_with_carry, start=8, width=1, id=58, pos=[(1,10,32)])
  ret tuple.59: (bits[8], bits[1]) = tuple(su

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire [7:0] a,
  input wire [7:0] b,
  input wire [7:0] c,
  output wire [8:0] out
);
  // lint_off MULTIPLY
  function automatic [15:0] umulp8b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    reg [7:0] result;
    begin
      result = lhs * rhs;
      umulp8b_8b_x_8b = {8'h43, result - 8'h43};
    end
  endfunction
  // lint_on MULTIPLY

  // ===== Pipe stage 0:

  // Registers for pipe stage 0:
  reg [7:0] p0_a;
  reg [7:0] p0_b;
  reg [7:0] p0_c;
  always_ff @ (posedge clk) begin
    p0_a <= a;
    p0_b <= b;
    p0_c <= c;
  end

  // ===== Pipe stage 1:
  wire [15:0] p1_umulp_79_comb;
  wire [7:0] p1_product_a_comb;
  wire [7:0] p1_product_b_comb;
  assign p1_umulp_79_comb = umulp8b_8b_x_8b(p0_a, p0_b);
  assign p1_product_a_comb = p1_umulp_79_comb[15:8];
  assign p1_product_b_comb = p1_umulp_79_comb[7:0];

  // Registers for pipe stage 1:
  reg [7:0] p1_c;
  reg [7:0] p1_product_a;
  reg [7:0] p1_product_b;
  always_ff @ (posed

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,a,0,0
0,b,0,0
0,c,0,0
0,umulp.45,697,697
0,product_a,0,697
0,product_b,0,697
1,literal.52,0,0
1,literal.69,0,0
1,concat.55,0,0
1,product__1,212,212


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
ADDER,ADD,9,9,2,1
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,TUPLE_INDEX,16,8,1,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1
INSIGNIFICANT,TUPLE,8,9,2,1
MISC,REGISTER_READ,0,8,0,6
MISC,REGISTER_WRITE,8,0,1,6


<IPython.core.display.Javascript object>

In [13]:
%%dslx --top=muladd_accumulate --clock_period_ps=700 --reset=reset
import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (u1, uN[N]) {
  let (product_a, product_b) = umulp(a, b);
  let product = std::uadd(product_a, product_b);
  let product_carry = product[N+:u1];
  let sum_with_carry = std::uadd(product[0+:uN[N]], c);
  let sum = sum_with_carry[0:N as s32];
  let carry_bit = sum_with_carry[N+:u1];
  (carry_bit, sum)
}

// define a proc named `muladd_accumulate` implementing a multiplier-accumulator
// procs express stateful sequencial logic similar always_ff block w/ register in SystemVerilog
proc muladd_accumulate {
    // define `input_a and `input_b` as 8-bits input channels
    // channels are uni-directional data fifo that allow communication between procs
    // they materialize as data bus with ready/valid signals in SystemVerilog.
    input_a: chan<u8> in;
    input_b: chan<u8> in;
    // define `output` as a 8-bits outputs channel
    output: chan<(u1, u8)> out;

    // `init` returns `0` as the initial N-bits state (the accumulator value) for the sequencial logic.
    init {
        u8:0
    }

    // `config` takes external channels as parameters and returns channel used by the sequencial logic.
    // order of the return channels has to match the proc channel definitions.
    // the proc can `recv` value from the `in` channels and `send` value to the `out` channel
    // similar to the module input and output definition in SystemVerilog.
    config(input_a: chan<u8> in, input_b: chan<u8> in, output: chan<(u1, u8)> out) {
        (input_a, input_b, output)
    }

    // `next` takes the current state (the current accumulated value) as a parameter
    // and returns the next state (the newly accumulated value).
    next(acc: u8) {
        // create a new token w/ no dependencies:
        // a token is used to synchronize and order I/O operations on the channels of the proc
        // if two I/O operations use the same token: they will happen concurrently.
        // each operation return a new `token` that can be used to sequence further operations.
        let tok = token();
        // receive one N-bits value from the input channel.
        let (tok_a, a) = recv(tok, input_a);
        // receive one N-bits value from the second input channel.
        // `tok` is used in both `recv`s so those happen concurrently.
        let (tok_b, b) = recv(tok, input_b);
        // wait for both receive operation to complete.
        let tok_c = join(tok_a, tok_b);
        // multiply `a` and `b` and add (accumulate) them to the previous `acc` value.
        let (c, n) = muladd(a, b, acc);
        // send the result and carry to the output channel.
        send(tok_c, output, (c, n));
        // return the accumulated result as the new state.
        n
    }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=2, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,154,20)])
  R: bits[32] = literal(value=9, id=5, pos=[(0,154,28)])
  ret add.8: bits[9] = add(zero_ext.6, zero_ext.7, id=8, pos=[(0,155,17)]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=2, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__muladd_accumulate_0_next(__state: bits[8], init={0}) {
  tok: token = literal(value=token, id=27)
  receive.82: (token, bits[8]) = receive(tok, channel=user_module__input_a, id=82)
  receive.83: (token, bits[8]) = receive(tok, channel=user_module__input_b, id=83)
  a: bits[8] = tuple_index(receive.82, index=1, id=31, pos=[(1,46,20)])
  b: bits[8] = tuple_index(receive.83, index=1, id=35, pos=[(

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire [7:0] user_module__input_a_data,
  input wire user_module__input_a_valid,
  input wire [7:0] user_module__input_b_data,
  input wire user_module__input_b_valid,
  input wire user_module__output_ready,
  output wire [8:0] user_module__output_data,
  output wire user_module__output_valid,
  output wire user_module__input_a_ready,
  output wire user_module__input_b_ready
);
  // lint_off MULTIPLY
  function automatic [15:0] umulp8b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    reg [7:0] result;
    begin
      result = lhs * rhs;
      umulp8b_8b_x_8b = {8'h43, result - 8'h43};
    end
  endfunction
  // lint_on MULTIPLY
  wire [8:0] __user_module__output_data_reg_init = {1'h0, 8'h00};
  reg [7:0] p0_product_a;
  reg [7:0] p0_product_b;
  reg [7:0] ____state;
  reg p0_valid;
  reg [7:0] __user_module__input_a_data_reg;
  reg __user_module__input_a_data_valid_reg;
  reg [7:0] __user_module__input_b_data_reg;
 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,tok,0,0
0,receive.82,0,0
0,receive.83,0,0
0,a,0,0
0,b,0,0
0,tok_a,0,0
0,tok_b,0,0
0,tok_c,0,0
0,umulp.62,697,697
0,product_a,0,697


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
ADDER,ADD,9,9,2,1
BITWISE,AND,1,1,2,6
BITWISE,NOT,1,1,1,4
BITWISE,OR,1,1,2,4
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,TUPLE_INDEX,16,8,1,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1


<IPython.core.display.Javascript object>

In [14]:
%%dslx --top=muladd_accumulate --clock_period_ps=700 --reset=reset
import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (u1, uN[N]) {
  let (product_a, product_b) = umulp(a, b);
  let product = std::uadd(product_a, product_b);
  let product_carry = product[N+:u1];
  let sum_with_carry = std::uadd(product[0+:uN[N]], c);
  let sum = sum_with_carry[0:N as s32];
  let carry_bit = sum_with_carry[N+:u1];
  (carry_bit, sum)
}

proc muladd_accumulate {
    input_a: chan<u8> in;
    input_b: chan<u8> in;
    output: chan<(u1, u8)> out;

    init {
        u8:0
    }

    config(input_a: chan<u8> in, input_b: chan<u8> in, output: chan<(u1, u8)> out) {
        (input_a, input_b, output)
    }

    next(acc: u8) {
        let tok = token();
        let (tok_a, a) = recv(tok, input_a);
        let (tok_b, b) = recv(tok, input_b);
        let tok_c = join(tok_a, tok_b);
        let (c, n) = muladd(a, b, acc);
        send(tok_c, output, (c, n));
        n
    }
}

// define a test proc to test `muladd_accumulate` proc's sequential logic.
// note: XLS does not currently generate the equivalent SystemVerilog testbed.
#[test_proc]
proc muladd_accumulate_test {
    // define channels to communicate with the proc under test
    input_a_s: chan<u8> out;
    input_b_s: chan<u8> out;
    output_r: chan<(u1, u8)> in;
    // define one output channel to terminate the test proc.
    terminator: chan<bool> out;

    init {
        () // no state
    }

    // `config` takes the `terminator` output channel as an argument.
    config(terminator: chan<bool> out) {
      // define a channel pair (sender, receiver) to communicate `input_a` values.
      let (input_a_s, input_a_r) = chan<u8>("input_a");
      // define a channel pair (sender, receiver) to communicate `input_b` values.
      let (input_b_s, input_b_r) = chan<u8>("input_b");
      // define a channel pair (sender, receiver) to communicate `output` values.
      let (output_s, output_r) = chan<(u1, u8)>("output");
      // spawn the `muladd_accumulate` with the receiving end of `input_a` and `input_b` channels (to receive inputs)
      // and the sending end of the `output` channels (to send the result).
      // this is equivalent to "instanciating" the underlying  SystemVerilog module of the corresponding proc.
      spawn muladd_accumulate(input_a_r, input_b_r, output_s);
      // return the sending end of `input_a` and `input_b` channels (to send inputs),
      // the receiving end of the `output` channels (to get the result)
      // and the terminator channel.
      (input_a_s, input_b_s, output_r, terminator)
    }

    // `next` performs the actual the test.
    next(state: ()) {
      // create a new token w/ no dependencies.
      let tok = token();
      // send `1` as the first input.
      let tok_a = send(tok, input_a_s, u8:1);
      // send `2` as the second input (concurrently).
      let tok_b = send(tok, input_b_s, u8:2);
      // wait for both send to complete (and overwrite the existing `tok` binding).
      let tok = join(tok_a, tok_b);
      // receive and assert the result value.
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      // send `3` as the first input.
      let tok_a = send(tok, input_a_s, u8:3);
      // send `4` as the second input (concurrently).
      let tok_b = send(tok, input_b_s, u8:4);
      // wait for both send to complete (and overwrite the existing `tok` binding).
      let tok = join(tok_a, tok_b);
      // receive and assert the accumulated value.
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:14);
      assert_eq(c, u1:0);

      // terminate the test proc.
      send(tok, terminator, true);
    }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] muladd_accumulate_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=2, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, id=4, pos=[(0,154,20)])
  R: bits[32] = literal(value=9, id=5, pos=[(0,154,28)])
  ret add.8: bits[9] = add(zero_ext.6, zero_ext.7, id=8, pos=[(0,155,17)]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=2, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__muladd_accumulate_0_next(__state: bits[8], init={0}) {
  tok: token = literal(value=token, id=27)
  receive.82: (token, bits[8]) = receive(tok, channel=user_module__input_a, id=82)
  receive.83: (token, bits[8]) = receive(tok, channel=user_module__input_b, id=83)
  a: bits[8] = tuple_index(receive.82, index=1, id=31, pos=[(1,28,20)])
  b: bits[8] = tuple_index(receive.83, index=1, id=35, pos=[(

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire [7:0] user_module__input_a_data,
  input wire user_module__input_a_valid,
  input wire [7:0] user_module__input_b_data,
  input wire user_module__input_b_valid,
  input wire user_module__output_ready,
  output wire [8:0] user_module__output_data,
  output wire user_module__output_valid,
  output wire user_module__input_a_ready,
  output wire user_module__input_b_ready
);
  // lint_off MULTIPLY
  function automatic [15:0] umulp8b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    reg [7:0] result;
    begin
      result = lhs * rhs;
      umulp8b_8b_x_8b = {8'h43, result - 8'h43};
    end
  endfunction
  // lint_on MULTIPLY
  wire [8:0] __user_module__output_data_reg_init = {1'h0, 8'h00};
  reg [7:0] p0_product_a;
  reg [7:0] p0_product_b;
  reg [7:0] ____state;
  reg p0_valid;
  reg [7:0] __user_module__input_a_data_reg;
  reg __user_module__input_a_data_valid_reg;
  reg [7:0] __user_module__input_b_data_reg;
 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,tok,0,0
0,receive.82,0,0
0,receive.83,0,0
0,a,0,0
0,b,0,0
0,tok_a,0,0
0,tok_b,0,0
0,tok_c,0,0
0,umulp.62,697,697
0,product_a,0,697


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
ADDER,ADD,9,9,2,1
BITWISE,AND,1,1,2,6
BITWISE,NOT,1,1,1,4
BITWISE,OR,1,1,2,4
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,LITERAL,0,1,0,2
INSIGNIFICANT,TUPLE_INDEX,16,8,1,2
INSIGNIFICANT,BIT_SLICE,9,1,1,1
INSIGNIFICANT,BIT_SLICE,9,8,1,1


<IPython.core.display.Javascript object>

In [15]:
%%dslx --top=muladd_accumulate --clock_period_ps=700 --reset=reset
import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (u1, uN[N]) {
  let (product_a, product_b) = umulp(a, b);
  let product = std::uadd(product_a, product_b);
  let product_carry = product[N+:u1];
  let sum_with_carry = std::uadd(product[0+:uN[N]], c);
  let sum = sum_with_carry[0:N as s32];
  let carry_bit = sum_with_carry[N+:u1];
  (carry_bit, sum)
}

proc muladd_accumulate {
    input_a: chan<u8> in;
    input_b: chan<u8> in;
    // define `reset` as a 1-bit input channel
    reset: chan<bool> in;
    output: chan<(u1, u8)> out;

    init {
        u8:0
    }

    config(input_a: chan<u8> in, input_b: chan<u8> in, reset: chan<bool> in, output: chan<(u1, u8)> out) {
        (input_a, input_b, reset, output)
    }

    next(acc: u8) {
        let tok = token();
        let (tok_a, a) = recv(tok, input_a);
        let (tok_b, b) = recv(tok, input_b);
        let tok_c = join(tok_a, tok_b);
        let (c, n) = muladd(a, b, acc);
        let tok_send = send(tok_c, output, (c, n));

        // use `recv_non_blocking` to conditionally recv from a channel w/ a default value as the last parameter.
        // returned tuple includes a `vld` flag if a new value was received from the channel.
        let (tok_reset, do_reset, do_reset_vld) = recv_non_blocking(tok, reset, false);
        if (do_reset_vld && do_reset) {
          u8:0 // return 0 as the new state (effectively reseting the accumulator).
        } else {
          n  // return the accumulated result as the new state.
        }
    }
}

#[test_proc]
proc muladd_accumulate_test {
    input_a_s: chan<u8> out;
    input_b_s: chan<u8> out;
    // define an additional channel output for sending reset.
    reset_s: chan<bool> out;
    output_r: chan<(u1, u8)> in;
    terminator: chan<bool> out;

    init {
        ()
    }

    config(terminator: chan<bool> out) {
      let (input_a_s, input_a_r) = chan<u8>("a");
      let (input_b_s, input_b_r) = chan<u8>("b");
      // define a channel pair (sender, receiver) to communicate `reset` values.
      let (reset_s, reset_r) = chan<bool>("reset");
      let (output_s, output_r) = chan<(u1, u8)>("output");

      spawn muladd_accumulate(input_a_r, input_b_r, reset_r, output_s);
      (input_a_s, input_b_s, reset_s, output_r, terminator)
    }

    next(state: ()) {
      let tok = token();
      let tok_a = send(tok, input_a_s, u8:1);
      let tok_b = send(tok, input_b_s, u8:2);
      let tok = join(tok_a, tok_b);
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      let tok_a = send(tok, input_a_s, u8:3);
      let tok_b = send(tok, input_b_s, u8:4);
      // send a reset.
      let tok_reset = send(tok, reset_s, true);
      // wait for all sends to complete (and overwrite the existing `tok` binding).
      let tok = join(tok_a, tok_b, tok_reset);
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:14);
      assert_eq(c, u1:0);

      // send `5` as the first input.
      let tok_a = send(tok, input_a_s, u8:1);
      // send `6` as the second input (concurrently).
      let tok_b = send(tok, input_b_s, u8:2);
      // wait for both send to complete (and overwrite the existing `tok` binding).
      let tok = join(tok_a, tok_b);
      // receive and assert the accumulated value.
      let (tok, (c, n)) = recv(tok, output_r);
      // assert that accumulator was reset.
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      // terminate the test proc.
      send(tok, terminator, true);
    }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] muladd_accumulate_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__reset(bits[1], id=2, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=3, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

fn __std__uadd__8_8_9(x: bits[8], y: bits[8]) -> bits[9] {
  zero_ext.6: bits[9] = zero_ext(x, new_bit_count=9, id=6)
  zero_ext.7: bits[9] = zero_ext(y, new_bit_count=9, id=7)
  N: bits[32] = literal(value=8, id=3, pos=[(0,154,12)])
  M: bits[32] = literal(value=8, 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input_a(bits[8], id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__input_b(bits[8], id=1, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__reset(bits[1], id=2, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=3, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__muladd_accumulate_0_next(__state: bits[8], init={0}) {
  tok: token = literal(value=token, id=27)
  receive.105: (token, bits[8]) = receive(tok, channel=user_module__input_a, id=105)
  receive.106: (token, bits[8]) = receive(tok, channel=user_

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire [7:0] user_module__input_a_data,
  input wire user_module__input_a_valid,
  input wire [7:0] user_module__input_b_data,
  input wire user_module__input_b_valid,
  input wire user_module__reset_data,
  input wire user_module__reset_valid,
  input wire user_module__output_ready,
  output wire [8:0] user_module__output_data,
  output wire user_module__output_valid,
  output wire user_module__input_a_ready,
  output wire user_module__input_b_ready,
  output wire user_module__reset_ready
);
  // lint_off MULTIPLY
  function automatic [15:0] umulp8b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    reg [7:0] result;
    begin
      result = lhs * rhs;
      umulp8b_8b_x_8b = {8'h43, result - 8'h43};
    end
  endfunction
  // lint_on MULTIPLY
  wire [8:0] __user_module__output_data_reg_init = {1'h0, 8'h00};
  reg [7:0] p0_product_a;
  reg [7:0] p0_product_b;
  reg [7:0] ____state;
  reg p0_valid;
  reg [7:0] __user_

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,tok,0,0
0,receive.105,0,0
0,receive.106,0,0
0,a,0,0
0,b,0,0
0,tok_a,0,0
0,tok_b,0,0
0,tok_c,0,0
0,umulp.85,697,697
0,product_a,0,697


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
ADDER,ADD,9,9,2,1
BITWISE,AND,1,1,2,8
BITWISE,OR,1,1,2,6
BITWISE,NOT,1,1,1,5
BITWISE,AND,1,1,3,1
BITWISE,NAND,1,1,2,1
INSIGNIFICANT,LITERAL,0,1,0,3
INSIGNIFICANT,CONCAT,8,9,2,2
INSIGNIFICANT,TUPLE_INDEX,16,8,1,2


<IPython.core.display.Javascript object>

In [16]:
%%dslx --top=muladd_accumulate --clock_period_ps=700 --reset=reset
import std;

fn muladd<N: u32>(a: uN[N], b: uN[N], c: uN[N]) -> (u1, uN[N]) {
  let (product_a, product_b) = umulp(a, b);
  let product = std::uadd(product_a, product_b);
  let product_carry = product[N+:u1];
  let sum_with_carry = std::uadd(product[0+:uN[N]], c);
  let sum = sum_with_carry[0:N as s32];
  let carry_bit = sum_with_carry[N+:u1];
  (carry_bit, sum)
}

// define `Op` enum for supporting multiple type of operations.
enum Op: u2 {
  MUL = 0,
  MUL_ACC = 1,
  RESET_ACC = 2,
}

// define `Command` struct to encapsulate op and their operands.
struct Command {
  op: Op,
  a: u8,
  b: u8,
}

proc muladd_accumulate {
    // define a single input channel.
    input: chan<Command> in;
    output: chan<(u1, u8)> out;

    init {
        u8:0
    }

    config(input: chan<Command> in, output: chan<(u1, u8)> out) {
        (input, output)
    }

    next(acc: u8) {
        // create a new token w/ no dependencies.
        let tok = token();
        // receive the current `command`.
        let (tok, command) = recv(tok, input);
        // match the command `op`s with the appropriate expression
        // and compute the result and the new accumulator value accordingly.
        // use `'` naming suffix convention for denoting a new "version" of an existing binding.
        let (c, n, acc') = match (command.op) {
          Op::MUL => {
            let (c, n) = std::umul_with_overflow<u32:8>(command.a, command.b);
            // keep current `acc` value.
            (c, n, acc)
          },
          Op::MUL_ACC => {
            let (c, n) = muladd(command.a, command.b, acc);
            // use `muladd` result  as the new `acc` value.
            (c, n, n)
          },
          Op::RESET_ACC => {
            // reset `acc` value.
            (false, u8:0, u8:0)
          },
          // catch all pattern to covering unsupported ops.
          _ => fail!("unsupported_op", (false, u8:0, u8:0))
        };
        send(tok, output, (c, n));
        // return new accumulator value as the new state.
        acc'
    }
}

#[test_proc]
proc muladd_accumulate_test {
    input_s: chan<Command> out;
    output_r: chan<(u1, u8)> in;
    terminator: chan<bool> out;

    init {
        ()
    }

    config(terminator: chan<bool> out) {
      let (input_s, input_r) = chan<Command>("input");
      let (output_s, output_r) = chan<(u1, u8)>("output");

      spawn muladd_accumulate(input_r, output_s);
      (input_s, output_r, terminator)
    }

    next(state: ()) {
      // create a new token w/ no dependencies.
      let tok = token();
      // send a `MUL` command.
      let tok = send(tok, input_s, Command{
        op: Op::MUL, a: u8:1, b: u8:2
      });
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      // send a `MUL_ACC` command.
      let tok = send(tok, input_s, Command{
        op: Op::MUL_ACC, a: u8:1, b: u8:2
      });
      let (tok, (c, n)) = recv(tok, output_r);
      // assert that previous `Mul` operation was not accumulated.
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      // send a `RESET_ACC` command.
      let tok = send(tok, input_s, Command{
        op: Op::RESET_ACC, a: u8:0, b: u8:0
      });
      let (tok, (c, n)) = recv(tok, output_r);
      assert_eq(n, u8:0);
      assert_eq(c, u1:0);

      // send a `MUL_ACC` command.
      let tok = send(tok, input_s, Command{
        op: Op::MUL_ACC, a: u8:1, b: u8:2
      });
      let (tok, (c, n)) = recv(tok, output_r);
      // assert that accumulator was reset.
      assert_eq(n, u8:2);
      assert_eq(c, u1:0);

      // terminate the test proc.
      send(tok, terminator, true);
    }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] muladd_accumulate_test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input((bits[2], bits[8], bits[8]), id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=1, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

fn __std__umul__4_4_8(x: bits[4], y: bits[4]) -> bits[8] {
  zero_ext.6: bits[8] = zero_ext(x, new_bit_count=8, id=6)
  zero_ext.7: bits[8] = zero_ext(y, new_bit_count=8, id=7)
  N: bits[32] = literal(value=4, id=3, pos=[(0,179,12)])
  M: bits[32] = literal(value=4, id=4, pos=[(0,179,20)])
  R: bits[32] = literal(value=8, id=5, pos=[(0,179,28)])
  ret umul.8: bits[8] = umul(zero_ext.6, zero_ext.7, id=8, pos=[(0,180,17)])
}

fn __std__smax__32(x: bits[32], y: bits[32]) -> bits[32] {
  sgt.12: bits[1] = sgt(x, y, id=12, pos=[(0,93,56)])
  N: bits[32] = li

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__input((bits[2], bits[8], bits[8]), id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__output((bits[1], bits[8]), id=1, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__muladd_accumulate_0_next(__state: bits[8], init={0}) {
  tok: token = literal(value=token, id=330)
  receive.702: (token, (bits[2], bits[8], bits[8])) = receive(tok, channel=user_module__input, id=702)
  command: (bits[2], bits[8], bits[8]) = tuple_index(receive.702, index=1, id=334, pos=[(1,44,18)])
  command_a: bits[8] = tuple_index(command, index=1, id=338, pos=[(1,50,63)])
  command_b: bits[8] = tuple_index(command, index=2, id=339, pos=[(1,50,74)])
  command_a0: bits[4] = bit_slice(command_a, start=0, width=4, id=569, pos=[(

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire [17:0] user_module__input_data,
  input wire user_module__input_valid,
  input wire user_module__output_ready,
  output wire [8:0] user_module__output_data,
  output wire user_module__output_valid,
  output wire user_module__input_ready
);
  // lint_off MULTIPLY
  function automatic [7:0] umul8b_4b_x_4b (input reg [3:0] lhs, input reg [3:0] rhs);
    begin
      umul8b_4b_x_4b = lhs * rhs;
    end
  endfunction
  // lint_on MULTIPLY
  // lint_off MULTIPLY
  function automatic [15:0] umulp8b_8b_x_8b (input reg [7:0] lhs, input reg [7:0] rhs);
    reg [7:0] result;
    begin
      result = lhs * rhs;
      umulp8b_8b_x_8b = {8'h43, result - 8'h43};
    end
  endfunction
  // lint_on MULTIPLY
  wire [17:0] __user_module__input_data_reg_init = {2'h0, 8'h00, 8'h00};
  wire [8:0] __user_module__output_data_reg_init = {1'h0, 8'h00};
  reg [4:0] p0_add_795;
  reg [3:0] p0_command_a0y1_b;
  reg [7:0] p0_product_a;
  reg [7:0

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,tok,0,0
0,receive.702,0,0
0,command,0,0
0,command_a,0,0
0,command_b,0,0
0,command_a0,0,0
0,command_b0,0,0
0,command_a1,0,0
0,literal.771,0,0
0,literal.772,0,0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,5,5,2,1
ADDER,ADD,6,6,2,1
ADDER,ADD,8,8,2,1
ADDER,ADD,9,9,2,1
BITWISE,OR,1,1,2,7
BITWISE,AND,1,1,2,6
BITWISE,NOT,1,1,1,6
BITWISE,NOR,1,1,2,1
BITWISE,OR,1,1,4,1
BITWISE_REDUCTION,OR_REDUCE,4,1,1,2


<IPython.core.display.Javascript object>

# Bonus examples

## simple counter

In [17]:
%%dslx --top=counter --clock_period_ps=300 --reset=reset

proc counter {
  output: chan<u8> out;

  init {
    u8:0
  }

  config(output: chan<u8> out) {
    (output,)
  }

  next(state: u8) {
    let tok = token();
    let tok = send(tok, output, state);
    state + u8:1
  }
}

#[test_proc]
proc test {
  counter_r: chan<u8> in;
  terminator: chan<bool> out;

  init { () }

  config(terminator: chan<bool> out) {
    let (counter_s, counter_r) = chan<u8>("counter");
    spawn counter(counter_s);
    (counter_r, terminator)
  }

  next(state: ()) {
    let tok = token();
    let (tok, count) = recv(tok, counter_r);
    assert_eq(count, u8:0);
    let (tok, count) = recv(tok, counter_r);
    assert_eq(count, u8:1);
    let (tok, count) = recv(tok, counter_r);
    assert_eq(count, u8:2);
    let (tok, count) = recv(tok, counter_r);
    assert_eq(count, u8:3);
    let tok = send(tok, terminator, true);
  }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

[ RUN UNITTEST  ] test
[            OK ]



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

chan user_module__output(bits[8], id=0, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__counter_0_next(__state: bits[8], init={0}) {
  tok: token = literal(value=token, id=4)
  literal.3: bits[1] = literal(value=1, id=3)
  literal.6: bits[8] = literal(value=1, id=6, pos=[(0,16,12)])
  __token: token = literal(value=token, id=1)
  tok__1: token = send(tok, __state, predicate=literal.3, channel=user_module__output, id=5)
  add.7: bits[8] = add(__state, literal.6, id=7, pos=[(0,16,10)])
  next (add.7)
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls_work_dir/user_module.x"

chan user_module__output(bits[8], id=0, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__counter_0_next(__state: bits[8], init={0}) {
  literal.6: bits[8] = literal(value=1, id=6, pos=[(0,16,12)])
  tok: token = literal(value=token, id=4)
  add.7: bits[8] = add(__state, literal.6, id=7, pos=[(0,16,10)])
  tok__1: token = send(tok, __state, channel=user_module__output, id=10)
  __state_next: () = next_value(param=__state, value=add.7, id=11)
}



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire user_module__output_ready,
  output wire [7:0] user_module__output_data,
  output wire user_module__output_valid
);
  reg [7:0] ____state;
  reg [7:0] __user_module__output_data_reg;
  reg __user_module__output_data_valid_reg;
  wire user_module__output_data_valid_inv;
  wire [7:0] add_15;
  wire user_module__output_data_valid_load_en;
  wire __user_module__output_valid_buf;
  assign user_module__output_data_valid_inv = ~__user_module__output_data_valid_reg;
  assign add_15 = ____state + 8'h01;
  assign user_module__output_data_valid_load_en = user_module__output_ready | user_module__output_data_valid_inv;
  assign __user_module__output_valid_buf = 1'h1;
  always_ff @ (posedge clk) begin
    if (reset) begin
      ____state <= 8'h00;
      __user_module__output_data_reg <= 8'h00;
      __user_module__output_data_valid_reg <= 1'h0;
    end else begin
      ____state <= user_module__output_data_valid_load_en ? add_15 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,__state,0,0
0,literal.6,0,0
0,tok,0,0
0,tok__1,0,0
0,add.7,212,212
0,__state_next,0,212


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,8,8,2,1
BITWISE,NOT,1,1,1,1
BITWISE,OR,1,1,2,1
INSIGNIFICANT,LITERAL,0,1,0,1
INSIGNIFICANT,LITERAL,0,8,0,1
MISC,INPUT_PORT,0,1,0,2
MISC,REGISTER_READ,0,8,0,2
MISC,REGISTER_WRITE,8,0,3,2
MISC,OUTPUT_PORT,1,0,1,1
MISC,OUTPUT_PORT,8,0,1,1


<IPython.core.display.Javascript object>

## streaming protobuf varint decoder

In [18]:
%%dslx --top=varint_streaming_u32_decode --clock_period_ps=1500 --reset=reset

import std;

// Performs a decoding of a variable-length varint value.
//
// Returns (decoded u32, consumed bytes).
//
// See https://developers.google.com/protocol-buffers/docs/encoding#varints
fn varint_decode_u32<NUM_BYTES:u32={std::clog2(u32:32)},
                     LEN_WIDTH:u32={std::clog2(NUM_BYTES)}>(
 bytes: u8[NUM_BYTES]) -> (u32, uN[LEN_WIDTH]) {
  type LenType = uN[LEN_WIDTH];

  let (chunks, last_chunk, saw_last_chunk) =
   for (i, (chunks, last_chunk, saw_last_chunk)):
   (u32, (u7[NUM_BYTES], LenType, bool)) in u32:0..NUM_BYTES {
    let current_byte = bytes[i];
    let lsbs = current_byte as u7;
    let msb = current_byte[-1:];
    if saw_last_chunk {
      (chunks, last_chunk, saw_last_chunk)
    } else {
      (update(chunks, i, lsbs), i as LenType, !msb)
    }
  }((u7[NUM_BYTES]:[u7:0, ...], LenType:0, bool:0));

  const FLATTENED_CHUNK_BITS = NUM_BYTES * u32:7;
  type FlattenedChunkType = uN[FLATTENED_CHUNK_BITS];
  let flattened: FlattenedChunkType =
  for (i, flattened): (u32, FlattenedChunkType) in u32:0..NUM_BYTES {
    flattened | ((chunks[i]  as FlattenedChunkType) << (u32:7 * i))
  }(zero!<FlattenedChunkType>());
  const NUM_EXTRA_BITS = FLATTENED_CHUNK_BITS - u32:32;
  let msbs = (flattened >> u32:32) as uN[NUM_EXTRA_BITS];

  (flattened as u32, last_chunk + u3:1)
}

// Convenience for use with map().
fn not(x: bool) -> bool { !x }

// Convenience to statically shift byte array left, filling with 'fill'.
fn byte_array_shl<SHIFT:u32, N:u32>(bytes: u8[N], fill: u8) -> u8[N] {
  for (i, arr): (u32, u8[N]) in u32:0..N {
    update(arr, i, if i + SHIFT < N { bytes[i + SHIFT] } else { fill })
  }(u8[N]:[u8:0, ...])
}

struct State<NUM_BYTES:u32, NUM_BYTES_WIDTH:u32> {
  work_chunk: u8[NUM_BYTES],       // Chunk of bytes currently being worked on.
  len: uN[NUM_BYTES_WIDTH],        // Number of valid bytes in work_chunk
  old_bytes: u8[4],                // Leftover bytes from a previous work chunk. Guaranteed not to
                                   // have any varint terminators, else they'd already be decoded.
  old_bytes_len: u3,               // Number of valid bytes in old_bytes_len. Invalid bytes start
                                   // with index 0.
  drop_count: uN[NUM_BYTES_WIDTH], // Number of bytes to drop from work_chunk/input.
}

const INPUT_BYTES = u32:7;
const OUTPUT_WORDS = u32:3;
const BIG_SHIFT = u32:3;
const INPUT_BYTES_WIDTH = std::clog2(INPUT_BYTES + u32:1);
const OUTPUT_WORDS_WIDTH = std::clog2(OUTPUT_WORDS + u32:1);
const BIG_SHIFT_WIDTH:u32= std::clog2(BIG_SHIFT + u32:1);
const COMBINED_BYTES: u32 = INPUT_BYTES + u32:4;
const COMBINED_BYTES_WIDTH:u32 = std::clog2(COMBINED_BYTES + u32:1);

pub proc varint_streaming_u32_decode {
  bytes_in: chan<(u8[INPUT_BYTES], uN[INPUT_BYTES_WIDTH])> in;
  words_out: chan<(u32[OUTPUT_WORDS], uN[OUTPUT_WORDS_WIDTH])> out;

  config(bytes_in: chan<(u8[INPUT_BYTES], uN[INPUT_BYTES_WIDTH])> in,
         words_out: chan<(u32[OUTPUT_WORDS], uN[OUTPUT_WORDS_WIDTH])> out) {
    (bytes_in, words_out)
  }

  init {
    type MyState = State<INPUT_BYTES, INPUT_BYTES_WIDTH>;
    zero!<MyState>()
  }

  next(state: State<INPUT_BYTES, INPUT_BYTES_WIDTH>) {
    let tok = token();
    trace_fmt!("state={}", state);

    const_assert!(INPUT_BYTES >= OUTPUT_WORDS);
    const_assert!(BIG_SHIFT > u32:1 && BIG_SHIFT < INPUT_BYTES);

    type OutputWordArray = u32[OUTPUT_WORDS];
    type OutputIdx = uN[OUTPUT_WORDS_WIDTH];
    type InputIdx = uN[INPUT_BYTES_WIDTH];
    type BigShiftIdx = uN[BIG_SHIFT_WIDTH];
    type CombinedIdx = uN[COMBINED_BYTES_WIDTH];

    let terminators: bool[INPUT_BYTES] =  // terminators[i] -> word_chunk[i] terminates a varint
      map(map(state.work_chunk, std::is_unsigned_msb_set), not);
    // Remove terminators on invalid bytes.
    let terminators = for (i, terminators): (u32, bool[INPUT_BYTES]) in u32:0..INPUT_BYTES {
      if i < state.len as u32 { terminators } else { update(terminators, i, false) }
    }(terminators);
    let num_terminators = std::popcount(std::convert_to_bits_msb0(terminators)) as InputIdx;

    // Find the index of the last terminator that will be decoded in this proc iteration.
    // We can decode OUTPUT_WORDS varints per iteration, so count up to OUTPUT_WORDS and stop.
    let (_, last_terminator_idx): (OutputIdx, u32) =
    for (idx, (word_count, last_idx)): (u32, (OutputIdx, u32)) in u32:0..INPUT_BYTES {
      if terminators[idx] && word_count as u32 < OUTPUT_WORDS {
        (word_count + OutputIdx:1, idx)
      } else { (word_count, last_idx) }
    }((OutputIdx:0, u32:0));

    // Get a new input once we've processed the entire work chunk.
    let do_input = state.len == InputIdx:0;
    let (input_tok, (input_data, input_len)) = recv_if(
      tok, bytes_in, do_input, (u8[INPUT_BYTES]:[u8:0, ...], InputIdx:0));

    trace_fmt!("input_data={} input_len={}, do_input={}", input_data, input_len, do_input);

    let do_drop = state.drop_count != InputIdx:0;
    assert!(!(do_input && do_drop), "input_and_drop");

    // Each iteration, we either shift by 1 or BIG_SHIFT. Do the shifts now and select later.
    let work_chunk_shl_1 = byte_array_shl<u32:1>(state.work_chunk, u8:0);
    let work_chunk_shl_big = byte_array_shl<BIG_SHIFT>(state.work_chunk, u8:0);

    trace!(last_terminator_idx);
    trace!(num_terminators);

    // Do a big shift if we're dropping a big shift's worth of bytes or if the last terminator is
    // at or after the end of the big shift's window.
    let do_big_shift = (do_drop && state.drop_count as u32 >= BIG_SHIFT) ||
                       (!do_drop && last_terminator_idx as u32 >= BIG_SHIFT - u32:1);

    // compute next state
    let next_drop_count = if do_input {
      InputIdx:0
    } else if do_big_shift {
      InputIdx:1 + last_terminator_idx as InputIdx - BIG_SHIFT as InputIdx
    } else if do_drop {
      state.drop_count - InputIdx:1
    } else {
      last_terminator_idx as InputIdx
    };

    let next_len = if do_input {
      input_len
    } else if do_big_shift {
      state.len - BIG_SHIFT as InputIdx
    } else if do_drop {
      state.len - InputIdx:1
    } else {
      state.len - InputIdx:1
    };
    // word_chunk is either set to input, word_chunk << 1, or word_chunk << BIG_SHIFT
    let next_word_chunk = if do_input {
      input_data
     } else if do_big_shift {
      work_chunk_shl_big
     } else if do_drop {
      work_chunk_shl_1
     } else {
      work_chunk_shl_1
     };

    let (next_old_bytes, next_old_bytes_len) = if do_input {
      (state.old_bytes, state.old_bytes_len)
    } else if do_big_shift {
      (state.old_bytes, u3:0)
    } else if num_terminators == InputIdx:0 {
      let next_old_bytes_len = if state.old_bytes_len < u3:4 { state.old_bytes_len + u3:1 } else { state.old_bytes_len };
      (byte_array_shl<u32:1>(state.old_bytes, state.work_chunk[0]), next_old_bytes_len)
    } else {
      (state.old_bytes, u3:0)
    };
    trace_fmt!("next_old_bytes calc: do_input={} do_big_shift={} num_terminators={}", do_input, do_big_shift, num_terminators);
    let next_state = State {
      work_chunk: next_word_chunk,
      len: next_len,
      old_bytes: next_old_bytes,
      old_bytes_len: next_old_bytes_len,
      drop_count: next_drop_count,
    };

    // Compute and output decoded varints.
    let bytes = state.old_bytes ++ state.work_chunk;
    let bytes_len = state.len as CombinedIdx + CombinedIdx:4;

    let (output_words, num_output_words, _) =
    for (i, (output_words, num_output_words, bytes_taken)):
     (u32, (OutputWordArray, OutputIdx, CombinedIdx)) in u32:0..OUTPUT_WORDS {
      let idx = bytes_taken as u32;
      let encoded = for (j, encoded): (u32, u8[5]) in u32:0..u32:5 {
        let val = if j + idx < COMBINED_BYTES { bytes[j + idx] } else { u8: 0 };
        update(encoded, j, val)
      }(u8[5]:[u8:0, ...]);
      let (decoded, this_bytes_taken) = varint_decode_u32(encoded);
      let total_bytes_taken = bytes_taken + this_bytes_taken as CombinedIdx;
      trace_fmt!(
        "encoded={}, decoded={}, this_bytes_taken={}, total_bytes_taken={}, bytes_len={}",
         encoded, decoded, this_bytes_taken, total_bytes_taken, bytes_len);
      if total_bytes_taken <= bytes_len {
        (
          update(output_words, i, decoded),
          num_output_words + OutputIdx:1,
          bytes_taken + this_bytes_taken as CombinedIdx,
        )
      } else {
        (output_words, num_output_words, bytes_taken)
      }
    }((zero!<OutputWordArray>(), OutputIdx:0, (u3:4 - state.old_bytes_len) as CombinedIdx));

    trace_fmt!("ouput_words={}, num_output_words={}", output_words, num_output_words);

    let output_tok = send_if(
      input_tok,
      words_out,
      !do_input && !do_drop && num_output_words > OutputIdx:0,
      (output_words, num_output_words));

    next_state
  }
}

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__bytes_in((bits[8][7], bits[3]), id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__words_out((bits[32][3], bits[2]), id=1, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

fn __std__is_unsigned_msb_set__8(x: bits[8]) -> bits[1] {
  N: bits[32] = literal(value=8, id=2, pos=[(0,982,27)])
  ret bit_slice.3: bits[1] = bit_slice(x, start=7, width=1, id=3, pos=[(0,982,56)])
}

fn __user_module__not(x: bits[1]) -> bits[1] {
  ret not.5: bits[1] = not(x, id=5, pos=[(1,40,26)])
}

fn ____std__convert_to_bits_msb0__7_counted_for_0_body(i: bits[32], accum: bits[7], x: bits[1][7]) -> bits[7] {
  literal.10: bits[32] = literal(value=0, id=10)
  literal.13: bits[32] = literal(value=7, id=13, pos=[(0,324,28)])
  add.11: bits[32] = a

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

package user_module

file_number 0 "xls/dslx/stdlib/std.x"
file_number 1 "xls_work_dir/user_module.x"

chan user_module__bytes_in((bits[8][7], bits[3]), id=0, kind=streaming, ops=receive_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")
chan user_module__words_out((bits[32][3], bits[2]), id=1, kind=streaming, ops=send_only, flow_control=ready_valid, strictness=proven_mutually_exclusive, metadata="""""")

top proc __user_module__varint_streaming_u32_decode_0_next(__state_0: bits[8][7], __state_1: bits[3], __state_2: bits[8][4], __state_3: bits[3], __state_4: bits[3], init={[0, 0, 0, 0, 0, 0, 0], 0, [0, 0, 0, 0], 0, 0}) {
  literal.411: bits[3] = literal(value=4, id=411, pos=[(1,210,47)])
  sub.413: bits[3] = sub(literal.411, __state_3, id=413, pos=[(1,210,52)])
  acc__22_squeezed_const_msb_bits: bits[1] = literal(value=0, id=768)
  bit_slice.5196: bits[2] = bit_slice(sub.413, start=1, width=2, id=5196, pos=[(1,193,23)])
  concat.5197: bits[3] = conca

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

module user_module(
  input wire clk,
  input wire reset,
  input wire [58:0] user_module__bytes_in_data,
  input wire user_module__bytes_in_valid,
  input wire user_module__words_out_ready,
  output wire [97:0] user_module__words_out_data,
  output wire user_module__words_out_valid,
  output wire user_module__bytes_in_ready
);
  wire [7:0] ____state_2_init[4];
  assign ____state_2_init = '{8'h00, 8'h00, 8'h00, 8'h00};
  wire [7:0] ____state_0_init[7];
  assign ____state_0_init = '{8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00};
  wire [7:0] p0_bytes_init[11];
  assign p0_bytes_init = '{8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00};
  wire [58:0] __user_module__bytes_in_data_reg_init = {{8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00}, 3'h0};
  wire [97:0] __user_module__words_out_data_reg_init = {{32'h0000_0000, 32'h0000_0000, 32'h0000_0000}, 2'h0};
  wire [58:0] literal_5976 = {{8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00, 8'h00}, 3'h0};
  reg [2:0] ___

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

stage,node,node_delay_ps,path_delay_ps
0,literal.411,0,0
0,__state_3,0,0
0,acc__22_squeezed_const_msb_bits__10,0,0
0,acc__22_squeezed_const_msb_bits,0,0
0,literal.5697,0,0
0,__state_2,0,0
0,__state_0,0,0
0,literal.4270,0,0
0,literal.4272,0,0
0,bytes,0,0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count
kind,op,output,input,arguments,Unnamed: 5_level_1
ADDER,ADD,3,3,2,16
ADDER,ADD,2,2,2,8
ADDER,ADD,4,4,2,8
ADDER,ADD,5,5,2,2
ADDER,SUB,3,3,2,1
BITWISE,AND,1,1,2,23
BITWISE,NOT,1,1,1,21
BITWISE,OR,1,1,2,15
BITWISE,AND,8,8,2,13
BITWISE,AND,1,1,3,10


<IPython.core.display.Javascript object>