In [1]:
import logging
import sys
logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
logging.getLogger().setLevel(logging.DEBUG)

In [2]:
import nnvm.compiler
import nnvm.symbol as sym

Let's create some simple neural network-like graph

In [3]:
x = sym.Variable("x")
w = sym.Variable("w")
b = sym.Variable("b")
z = sym.conv2d(data=x, weight=w, bias=b, channels=3, kernel_size=(5,5))
z = z + sym.conv2d(data=z, weight=w, bias=b, channels=3, kernel_size=(5,5), padding=[2,2])

compute_graph = nnvm.graph.create(z)

We can print the graph in human-readable form with the `ir` method

In [4]:
print(compute_graph.ir())

Graph(%x, %w, %b) {
  %3 = conv2d(%x, %w, %b, kernel_size='(5, 5)', channels='3')
  %4 = conv2d(%3, %w, %b, channels='3', padding='[2, 2]', kernel_size='(5, 5)')
  %5 = broadcast_add(%3, %4)
  ret %5
}


Before building the graph, let's perform an ugly trick and modify the function `Graph.apply`, which applies a list of passes to a graph, so that it print the applied passes and the resulting graph. It will help us to figure out what the building process consists of.

In [5]:
old_graph_apply = nnvm.graph.Graph.apply

In [6]:
def my_modified_graph_apply(self, passes):
    res = old_graph_apply(self, passes)
    # printing ir is also implemented as a pass, so we have to prevent an infinite loop
    if passes not in [["PrintGraphIR"], "PrintGraphIR"]:
        print("Applied passes " + str(passes))
        print(res.ir())
    return res

nnvm.graph.Graph.apply = my_modified_graph_apply

Now let's call the build function

In [7]:
deploy_graph, lib, params = nnvm.compiler.build(
    compute_graph, target="llvm", shape={"x": (7,3,11,13)}, dtype="float32")

Applied passes SaveJSON
Graph() {
  ret 
}
graph_attr_keys = [json]

Applied passes CorrectLayout
Graph(%x, %w, %b) {
  %3 = conv2d(%x, %w, %b, kernel_size='(5, 5)', channels='3')
  %4 = conv2d(%3, %w, %b, channels='3', padding='[2, 2]', kernel_size='(5, 5)')
  %5 = broadcast_add(%3, %4)
  ret %5
}
graph_attr_keys = [layout]

Applied passes SaveJSON
Graph() {
  ret 
}
graph_attr_keys = [json]

Applied passes InferShape
Graph(%x, %w, %b) {
  %3 = conv2d(%x, %w, %b, kernel_size='(5, 5)', channels='3')
  %4 = conv2d(%3, %w, %b, channels='3', padding='[2, 2]', kernel_size='(5, 5)')
  %5 = broadcast_add(%3, %4)
  ret %5
}
graph_attr_keys = [shape_num_unknown_nodes, layout, shape]

Applied passes SaveJSON
Graph() {
  ret 
}
graph_attr_keys = [json]

Applied passes ['InferShape', 'SimplifyInference']
Graph(%x, %w, %b) {
  %3 = conv2d(%x, %w, %b, kernel_size='(5, 5)', channels='3')
  %4 = conv2d(%3, %w, %b, channels='3', padding='[2, 2]', kernel_size='(5, 5)')
  %5 = broadcast_add(%3, %4)
  re

DEBUG:root:lower function fuse_conv2d_broadcast_add
DEBUG:root:// attr [pad_temp] storage_scope = "global"
allocate pad_temp[float32 * 7 * 3 * 11 * 13]
// attr [compute] storage_scope = "global"
allocate compute[float32 * 7 * 3 * 7 * 9]
produce pad_temp {
  parallel (i0.i1.fused, 0, 21) {
    for (i2, 0, 11) {
      for (i3, 0, 13) {
        pad_temp[((((i0.i1.fused*11) + i2)*13) + i3)] = tvm_if_then_else(((((2 <= i2) && (i2 < 9)) && (2 <= i3)) && (i3 < 11)), input0[(((((i0.i1.fused*7) + i2)*9) + i3) + -20)], 0.000000f)
      }
    }
  }
}
produce compute {
  parallel (nn.ff.fused, 0, 21) {
    for (yy.init, 0, 7) {
      for (xx.inner.init.s, 0, 16) {
        if (likely((xx.inner.init.s < 9))) {
          compute[((((nn.ff.fused*7) + yy.init)*9) + xx.inner.init.s)] = 0.000000f
        }
      }
    }
    for (rc, 0, 3) {
      for (yy, 0, 7) {
        for (xx.inner.s, 0, 16) {
          if (likely((xx.inner.s < 9))) {
            compute[((((nn.ff.fused*7) + yy)*9) + xx.inner.s)] = (c

Applied passes GraphFuseCompile
Graph(%x, %w, %b) {
  %3 = tvm_op(%x, %w, %b, num_outputs='1', num_inputs='3', flatten_data='0', func_name='fuse_conv2d')
  %4 = tvm_op(%3, %w, %b, num_outputs='1', num_inputs='3', flatten_data='0', func_name='fuse_conv2d_broadcast_add')
  ret %4
}
graph_attr_keys = [storage_id, dtype, dltype, shape, module]



There is a lot of output. First the graph is transformed by some passes. Then in the pass GraphFuseCompile, lowering is performed. If you look as the source code, you'll see that it calls some function `"nnvm.compiler.lower"` dynamically by its name. It's actually called `_lower` and it can be found in `nnvm/python/nnvm/compiler/build_module.py`. It calls `tvm.lower` which does stuff (here is the boundary between nnvm and tvm!) and, if the logging level is debug, it also prints out some lowered representation (not exactly the one actually used though).
