In [2]:
!pip3 install --user psutil xgboost cloudpickle



In [3]:
!pip3 install --user cython
!sudo make cython3

make: *** No rule to make target 'cython3'.  Stop.


In [30]:
import logging
import sys
import numpy as np
import tvm
from tvm import te
import tvm.testing

from tvm import autotvm

In [31]:
def matmul_basic(N, L, M, dtype):

    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    yo, yi = s[C].split(y, 8)
    xo, xi = s[C].split(x, 8)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [34]:
def matmul_v1(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    # 2. get the config object
    cfg = autotvm.get_config()

    # 3. define search space
    cfg.define_knob("tile_y", [1, 2, 4, 8, 16])
    cfg.define_knob("tile_x", [1, 2, 4, 8, 16])

    # 4. schedule according to config
    yo, yi = s[C].split(y, cfg["tile_y"].val)
    xo, xi = s[C].split(x, cfg["tile_x"].val)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [35]:
def matmul(N, L, M, dtype):
    A = te.placeholder((N, L), name="A", dtype=dtype)
    B = te.placeholder((L, M), name="B", dtype=dtype)

    k = te.reduce_axis((0, L), name="k")
    C = te.compute((N, M), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="C")
    s = te.create_schedule(C.op)

    # schedule
    y, x = s[C].op.axis
    k = s[C].op.reduce_axis[0]

    ##### define space begin #####
    cfg = autotvm.get_config()
    cfg.define_split("tile_y", y, num_outputs=2)
    cfg.define_split("tile_x", x, num_outputs=2)
    ##### define space end #####

    # schedule according to config
    yo, yi = cfg["tile_y"].apply(s, C, y)
    xo, xi = cfg["tile_x"].apply(s, C, x)

    s[C].reorder(yo, xo, k, yi, xi)

    return s, [A, B, C]

In [36]:
N, L, M = 512, 512, 512
task = autotvm.task.create("tutorial/matmul", args=(N, L, M, "float32"), target="llvm")
print(task.config_space)

ConfigSpace (len=100, range_length=100, space_map=
   0 tile_y: Split(policy=factors, product=512, num_outputs=2) len=10
   1 tile_x: Split(policy=factors, product=512, num_outputs=2) len=10
)


In [37]:
# logging config (for printing tuning log to the screen)
logging.getLogger("autotvm").setLevel(logging.DEBUG)
logging.getLogger("autotvm").addHandler(logging.StreamHandler(sys.stdout))

In [41]:
measure_option = autotvm.measure_option(builder="local", runner=autotvm.LocalRunner(number=5))

# Begin tuning with RandomTuner, log records to file `matmul.log`
# You can use alternatives like XGBTuner.
tuner = autotvm.tuner.RandomTuner(task)
tuner.tune(
    n_trial=10,
    measure_option=measure_option,
    callbacks=[autotvm.callback.log_to_file("matmul.log")],
)

waiting for device...
waiting for device...
waiting for device...
waiting for device...
waiting for device...


DEBUG:autotvm:waiting for device...


device available
device available
device available
device available
device available


DEBUG:autotvm:device available


Get devices for measurement successfully!
Get devices for measurement successfully!
Get devices for measurement successfully!
Get devices for measurement successfully!
Get devices for measurement successfully!


INFO:autotvm:Get devices for measurement successfully!


No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, timestamp=1726491179.2314863)	[('tile_y', [-1, 256]), ('tile_x', [-1, 128])],None,78
No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, timestamp=1726491179.2314863)	[('tile_y', [-1, 256]), ('tile_x', [-1, 128])],None,78
No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, timestamp=1726491179.2314863)	[('tile_y', [-1, 256]), ('tile_x', [-1, 128])],None,78
No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, timestamp=1726491179.2314863)	[('tile_y', [-1, 256]), ('tile_x', [-1, 128])],None,78
No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, 

DEBUG:autotvm:No: 1	GFLOPS: 8.70/8.70	result: MeasureResult(costs=(0.0308601128,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.0401132106781006, timestamp=1726491179.2314863)	[('tile_y', [-1, 256]), ('tile_x', [-1, 128])],None,78


No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180.03792)	[('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180.03792)	[('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180.03792)	[('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180.03792)	[('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62
No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180

DEBUG:autotvm:No: 2	GFLOPS: 7.81/8.70	result: MeasureResult(costs=(0.0343577214,), error_no=MeasureErrorNo.NO_ERROR, all_cost=0.9211416244506836, timestamp=1726491180.03792)	[('tile_y', [-1, 4]), ('tile_x', [-1, 64])],None,62


No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.236276388168335, timestamp=1726491183.4421086)	[('tile_y', [-1, 64]), ('tile_x', [-1, 4])],None,26
No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.236276388168335, timestamp=1726491183.4421086)	[('tile_y', [-1, 64]), ('tile_x', [-1, 4])],None,26
No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.236276388168335, timestamp=1726491183.4421086)	[('tile_y', [-1, 64]), ('tile_x', [-1, 4])],None,26
No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.236276388168335, timestamp=1726491183.4421086)	[('tile_y', [-1, 64]), ('tile_x', [-1, 4])],None,26
No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3

DEBUG:autotvm:No: 3	GFLOPS: 1.47/8.70	result: MeasureResult(costs=(0.18276270420000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.236276388168335, timestamp=1726491183.4421086)	[('tile_y', [-1, 64]), ('tile_x', [-1, 4])],None,26


No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, timestamp=1726491185.9068668)	[('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47
No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, timestamp=1726491185.9068668)	[('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47
No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, timestamp=1726491185.9068668)	[('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47
No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, timestamp=1726491185.9068668)	[('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47
No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, time

DEBUG:autotvm:No: 4	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.1415868136,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.5538601875305176, timestamp=1726491185.9068668)	[('tile_y', [-1, 128]), ('tile_x', [-1, 16])],None,47


No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.979657411575317, timestamp=1726491192.032195)	[('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.979657411575317, timestamp=1726491192.032195)	[('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.979657411575317, timestamp=1726491192.032195)	[('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.979657411575317, timestamp=1726491192.032195)	[('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17
No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5

DEBUG:autotvm:No: 5	GFLOPS: 0.76/8.70	result: MeasureResult(costs=(0.35474130919999997,), error_no=MeasureErrorNo.NO_ERROR, all_cost=5.979657411575317, timestamp=1726491192.032195)	[('tile_y', [-1, 128]), ('tile_x', [-1, 2])],None,17


No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349315)	[('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0
No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349315)	[('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0
No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349315)	[('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0
No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349315)	[('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0
No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349

DEBUG:autotvm:No: 6	GFLOPS: 1.22/8.70	result: MeasureResult(costs=(0.2195190714,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.904259443283081, timestamp=1726491195.9349315)	[('tile_y', [-1, 1]), ('tile_x', [-1, 1])],None,0


No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.607216)	[('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.607216)	[('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.607216)	[('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.607216)	[('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24
No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.

DEBUG:autotvm:No: 7	GFLOPS: 1.33/8.70	result: MeasureResult(costs=(0.2023755022,), error_no=MeasureErrorNo.NO_ERROR, all_cost=3.456690788269043, timestamp=1726491199.607216)	[('tile_y', [-1, 16]), ('tile_x', [-1, 4])],None,24


No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8819167613983154, timestamp=1726491201.3316388)	[('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8819167613983154, timestamp=1726491201.3316388)	[('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8819167613983154, timestamp=1726491201.3316388)	[('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8819167613983154, timestamp=1726491201.3316388)	[('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48
No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR

DEBUG:autotvm:No: 8	GFLOPS: 2.93/8.70	result: MeasureResult(costs=(0.09154923940000001,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.8819167613983154, timestamp=1726491201.3316388)	[('tile_y', [-1, 256]), ('tile_x', [-1, 16])],None,48


No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, timestamp=1726491202.6620157)	[('tile_y', [-1, 512]), ('tile_x', [-1, 128])],None,79
No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, timestamp=1726491202.6620157)	[('tile_y', [-1, 512]), ('tile_x', [-1, 128])],None,79
No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, timestamp=1726491202.6620157)	[('tile_y', [-1, 512]), ('tile_x', [-1, 128])],None,79
No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, timestamp=1726491202.6620157)	[('tile_y', [-1, 512]), ('tile_x', [-1, 128])],None,79
No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, times

DEBUG:autotvm:No: 9	GFLOPS: 5.53/8.70	result: MeasureResult(costs=(0.0485434814,), error_no=MeasureErrorNo.NO_ERROR, all_cost=1.191511631011963, timestamp=1726491202.6620157)	[('tile_y', [-1, 512]), ('tile_x', [-1, 128])],None,79


No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.575852870941162, timestamp=1726491205.1535156)	[('tile_y', [-1, 512]), ('tile_x', [-1, 16])],None,49
No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.575852870941162, timestamp=1726491205.1535156)	[('tile_y', [-1, 512]), ('tile_x', [-1, 16])],None,49
No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.575852870941162, timestamp=1726491205.1535156)	[('tile_y', [-1, 512]), ('tile_x', [-1, 16])],None,49
No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.575852870941162, timestamp=1726491205.1535156)	[('tile_y', [-1, 512]), ('tile_x', [-1, 16])],None,49
No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERRO

DEBUG:autotvm:No: 10	GFLOPS: 1.90/8.70	result: MeasureResult(costs=(0.14120382839999998,), error_no=MeasureErrorNo.NO_ERROR, all_cost=2.575852870941162, timestamp=1726491205.1535156)	[('tile_y', [-1, 512]), ('tile_x', [-1, 16])],None,49


In [43]:
# apply history best from log file
with autotvm.apply_history_best("matmul.log"):
    with tvm.target.Target("llvm"):
        s, arg_bufs = matmul(N, L, M, "float32")
        func = tvm.build(s, arg_bufs)

# check correctness
a_np = np.random.uniform(size=(N, L)).astype(np.float32)
b_np = np.random.uniform(size=(L, M)).astype(np.float32)
c_np = a_np.dot(b_np)

c_tvm = tvm.nd.empty(c_np.shape)
func(tvm.nd.array(a_np), tvm.nd.array(b_np), c_tvm)

tvm.testing.assert_allclose(c_np, c_tvm.numpy(), rtol=1e-4)

Finish loading 65 records
Finish loading 65 records
Finish loading 65 records
Finish loading 65 records
Finish loading 65 records


DEBUG:autotvm:Finish loading 65 records
