diff --git a/topi/python/topi/mali/conv2d.py b/topi/python/topi/mali/conv2d.py index d19592857086..12eb3d7c78c8 100644 --- a/topi/python/topi/mali/conv2d.py +++ b/topi/python/topi/mali/conv2d.py @@ -138,13 +138,14 @@ def _schedule_spatial_pack(cfg, s, output, conv, data_vec, kernel_vec): s[data_vec].unroll(vw) if isinstance(kernel_vec.op, tvm.te.ComputeOp) and kernel_vec.name == 'kernel_vec': + co, ci, kh, kw, vc = s[kernel_vec].op.axis if autotvm.GLOBAL_SCOPE.in_tuning: - # kernel packing will be pre-computed during compilation, so we skip - # this part to make tuning records correct - s[kernel_vec].pragma(s[kernel_vec].op.axis[0], 'debug_skip_region') + # Directly use modified data layout placeholder. + kvshape = (co // vc, ci, kh, kw, vc) + kernel_vec = tvm.te.placeholder(kvshape, kernel_vec.dtype, name="kernel") + s[kernel_vec] = kernel_vec else: max_threads = tvm.target.Target.current(allow_none=False).max_num_threads - co, ci, kh, kw, vc = s[kernel_vec].op.axis fused = s[kernel_vec].fuse(co, ci, kh, kw, vc) fused, vec = s[kernel_vec].split(fused, VC) bb, tt = s[kernel_vec].split(fused, max_threads) @@ -345,11 +346,7 @@ def _schedule_winograd(cfg, s, op): kernel, G = s[U].op.input_tensors s[G].compute_inline() eps, nu, co, ci, vco, = s[U].op.axis - if autotvm.GLOBAL_SCOPE.in_tuning: - # kernel transformation will be pre-computed during compilation, so we skip - # this part to make tuning records correct - s[U].pragma(eps, 'debug_skip_region') - else: + if not autotvm.GLOBAL_SCOPE.in_tuning: r_kh, r_kw = s[U].op.reduce_axis s[U].reorder(co, ci, eps, nu, r_kh, r_kw, vco) _ = [s[U].unroll(x) for x in [eps, nu, r_kh, r_kw]]