# [Schedule Primitives in TVM](https://tvm.apache.org/docs/how_to/work_with_schedules/schedule_primitives.html#sphx-glr-how-to-work-with-schedules-schedule-primitives-py)

## Create Schedule

In [None]:
from __future__ import absolute_import, print_function


import tvm
from tvm import te
import numpy as np

# declare some variables for use later
n = te.var("n")
m = te.var("m")

def test_elewise_mul():
  # declare a matrix element-wise multiply
  A = te.placeholder((m, n), name="A")
  B = te.placeholder((m, n), name="B")
  C = te.compute((m, n), lambda i, j: A[i, j] * B[i, j], name="C")

  s = te.create_schedule([C.op])
  # lower will transform the computation from definition to the real
  # callable function. With argument `simple_mode=True`, it will
  # return you a readable C like statement, we use it here to print the
  # schedule result.
  print(tvm.lower(s, [A, B, C], simple_mode=True))

test_elewise_mul()

## SchedulePrimitives::Split

SplitFactor：将指定维度按照指定长度进行切分


In [None]:
def test_split_factor(axis, factor=32):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j], name="B")
  s = te.create_schedule(B.op)
  xo, xi = s[B].split(B.op.axis[axis], factor=factor)
  print(tvm.lower(s, [A, B], simple_mode=True))

test_split_factor(axis=0)
test_split_factor(axis=1)


SplitParts：将指定维度按照指定份数进行切分


In [None]:
def test_split_nparts(nparts=8):
  A = te.placeholder((m,), name="A")
  B = te.compute((m,), lambda i: A[i], name="B")
  s = te.create_schedule(B.op)
  bx, tx = s[B].split(B.op.axis[0], nparts=nparts)
  print(tvm.lower(s, [A, B], simple_mode=True))

test_split_nparts()

## SchedulePrimitives::Tile

分块操作，注意Tile和Split是有区别的，我们无法用两个Split完成一个Tile的功能，但是可以通过Tile覆盖Split功能（Split中不切的axis-factor设为1）。


In [None]:
def test_tile(x, y, simple_mode=True):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j], name="B")
  s = te.create_schedule(B.op)
  xo, yo, xi, yi = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=x, y_factor=y)
  print(tvm.lower(s, [A, B], simple_mode=simple_mode))

test_tile(10, 5)

比较一下 ___Split___ 和 ___Tile___ 的行为： ___Tile___ 需要 ___Split___ 和 ___Reorder___


In [None]:
def test_split_x2(x_factor, y_factor):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j], name="B")
  s = te.create_schedule(B.op)
  xo, xi = s[B].split(B.op.axis[0], factor=x_factor)
  yo, yi = s[B].split(B.op.axis[1], factor=y_factor)
  print(tvm.lower(s, [A, B], simple_mode=True))

print('*'*64)
print('test_split_x2(10, 5)')
print('*'*64)
test_split_x2(10, 5)
print('*'*64)
print('test_tile(10, 5)')
print('*'*64)
test_tile(10, 5)

比较一下 ___Split___ 和 ___Tile___ 的行为：用 ___Tile___ 来实现 ___Split___：


In [None]:
test_split_factor(axis=1, factor=5)
test_tile(1, 5)

## SchedulePrimitives::Fuse

合并连续的 ___N___ 个维度


In [None]:
def test_fuse(axis0, axis1, simple_mode=True):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j], name="B")
  s = te.create_schedule(B.op)
  # tile to four axes first: (i.outer, j.outer, i.inner, j.inner)
  axes4 = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
  # then fuse (i.inner, j.inner) into one axis: (i.inner.j.inner.fused)
  fused = s[B].fuse(axes4[axis0], axes4[axis1])
  ### fused = s[B].fuse(xo, yo)
  print(tvm.lower(s, [A, B], simple_mode=simple_mode))

test_fuse(1, 2, True)

## SchedulePrimitives::Reorder

维度调换，相当于Transpose功能

In [None]:
def test_reorder(axis0, axis1, axis2, axis3):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j], name="B")
  s = te.create_schedule(B.op)
  # tile to four axes first: (i.outer, j.outer, i.inner, j.inner)
  axes4 = s[B].tile(B.op.axis[0], B.op.axis[1], x_factor=10, y_factor=5)
  s[B].reorder(axes4[axis0], axes4[axis1], axes4[axis2], axes4[axis3])
  print(tvm.lower(s, [A, B], simple_mode=True))

test_reorder(0, 1, 2, 3) # no change
test_reorder(2, 1, 0, 3)

## SchedulePrimitives::Gpu::Bind


In [None]:
def test_bind():
  A = te.placeholder((n,), name="A")
  B = te.compute(A.shape, lambda i: A[i], name="B")
  s = te.create_schedule(B.op)
  bx, tx = s[B].split(B.op.axis[0], factor=64)
  s[B].bind(bx, te.thread_axis("blockIdx.x"))
  s[B].bind(tx, te.thread_axis("threadIdx.x"))
  print(tvm.lower(s, [A, B], simple_mode=True))
test_bind()

## SchedulePrimitives::Compute_At

移动一个Compute Stage 到指定的计算指定维度中，看起来可以用在 Fusion 中。

In [None]:
def test_compute_at(axis):
  A = te.placeholder((m, n), name="A")
  B = te.compute((m, n), lambda i, j: A[i, j] + 1, name="B")
  C = te.compute((m, n), lambda i, j: B[i, j] * 2, name="C")
  s = te.create_schedule(C.op)
  # move computation of B into the first axis of computation of C
  s[B].compute_at(s[C], C.op.axis[axis]) 
  print(tvm.lower(s, [A, B, C], simple_mode=True))

test_compute_at(-1)
test_compute_at(0)

## SchedulePrimitives::Compute_Inline

In [None]:
def test_compute_inline():
  A = te.placeholder((m,), name="A")
  B = te.compute((m,), lambda i: A[i] + 1, name="B")
  C = te.compute((m,), lambda i: B[i] * 2, name="C")
  s = te.create_schedule(C.op)
  # mark one stage as inline
  s[B].compute_inline()
  print(tvm.lower(s, [A, B, C], simple_mode=True))

test_compute_inline()

## SchedulePrimitives::Compute_Root

将一个计算移动到根上

In [None]:
def test_compute_root():
  A = te.placeholder((m,), name="A")
  B = te.compute((m,), lambda i: A[i] + 1, name="B")
  C = te.compute((m,), lambda i: B[i] * 2, name="C")
  s = te.create_schedule(C.op)
  s[B].compute_at(s[C], C.op.axis[0])
  # move computation of one stage to the root
  s[B].compute_root()
  print(tvm.lower(s, [A, B, C], simple_mode=True))

test_compute_root()