[CPU][SVE] Failed to legalize multi-dim constant #16784

dcaballe · 2024-03-14T22:49:44Z

Error:

<unknown>:0: error: failed to legalize operation 'arith.constant' that was explicitly marked illegal
<unknown>:0: note: see current operation: %2 = "arith.constant"() <{value = dense<0.000000e+00> : vector<[16]x8xf32>}> : () -> vector<[16]x8xf32>

Repro:

iree-compile --iree-hal-target-backends=llvm-cpu --iree-input-type=stablehlo --iree-llvmcpu-target-cpu-features=+sve --iree-llvmcpu-link-embedded=false --iree-opt-data-tiling=false --iree-llvmcpu-enable-ukernels=none --iree-llvmcpu-enable-scalable-vectorization=true test.mlir -o test.vmfb

hal.executable public @test {
  hal.executable.variant public @system_elf_arm_64 target(<"llvm-cpu", "system-elf-arm_64", {cpu = "", cpu_features = "+v9a,+fullfp16,+fp-armv8,+neon,+aes,+sha2,+crc,+lse,+rdm,+complxnum,+rcpc,+sha3,+sm4,+dotprod,+fp16fml,+dit,+flagm,+ssbs,+sb,+sve2-aes,+sve2-bitperm,+sve2-sha3,+sve2-sm4,+altnzcv,+fptoint,+bf16,+i8mm,+bti,+mte,+pauth,+perfmon,+predres,+spe,+ras,+sve,+sve2,+reserve-x18", data_layout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", debug_symbols = false, link_embedded = false, native_vector_size = 16 : i64, target_triple = "aarch64-none-linux-android34", ukernels = "none"}>) {
    hal.executable.export public @test ordinal(0) layout(#hal.pipeline.layout<push_constants = 5, sets = [<0, bindings = [<0, storage_buffer, ReadOnly>, <1, storage_buffer, ReadOnly>, <2, storage_buffer>]>]>) attributes {hal.interface.bindings = [#hal.interface.binding<0, 0>, #hal.interface.binding<0, 1>, #hal.interface.binding<0, 2>]} {
    ^bb0(%arg0: !hal.device):
      %x, %y, %z = flow.dispatch.workgroup_count_from_slice
      hal.return %x, %y, %z : index, index, index
    }
    builtin.module {
      func.func @test() {
        %c0_i32 = arith.constant 0 : i32
        %0 = hal.interface.constant.load[0] : i32
        %1 = hal.interface.constant.load[1] : i32
        %2 = hal.interface.constant.load[2] : i32
        %3 = hal.interface.constant.load[3] : i32
        %4 = hal.interface.constant.load[4] : i32
        %5 = arith.index_castui %0 : i32 to index
        %6 = arith.index_castui %1 : i32 to index
        %7 = arith.index_castui %2 : i32 to index
        %8 = arith.index_castui %3 : i32 to index
        %9 = arith.index_castui %4 : i32 to index
        %10 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%5) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128x256xi8>>
        %11 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%7) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256x256xi8>>
        %12 = hal.interface.binding.subspan set(0) binding(0) type(storage_buffer) alignment(64) offset(%6) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<128xf32>>
        %13 = hal.interface.binding.subspan set(0) binding(1) type(storage_buffer) alignment(64) offset(%8) flags(ReadOnly) : !flow.dispatch.tensor<readonly:tensor<256xf32>>
        %14 = hal.interface.binding.subspan set(0) binding(2) type(storage_buffer) alignment(64) offset(%9) : !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
        %15 = flow.dispatch.tensor.load %10, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<128x256xi8>> -> tensor<128x256xi8>
        %16 = flow.dispatch.tensor.load %11, offsets = [0, 0], sizes = [256, 256], strides = [1, 1] : !flow.dispatch.tensor<readonly:tensor<256x256xi8>> -> tensor<256x256xi8>
        %17 = flow.dispatch.tensor.load %12, offsets = [0], sizes = [128], strides = [1] : !flow.dispatch.tensor<readonly:tensor<128xf32>> -> tensor<128xf32>
        %18 = flow.dispatch.tensor.load %13, offsets = [0], sizes = [256], strides = [1] : !flow.dispatch.tensor<readonly:tensor<256xf32>> -> tensor<256xf32>
        %19 = tensor.empty() : tensor<128x256xf32>
        %20 = tensor.empty() : tensor<128x256xi32>
        %21 = linalg.fill ins(%c0_i32 : i32) outs(%20 : tensor<128x256xi32>) -> tensor<128x256xi32>
        %22 = linalg.matmul ins(%15, %16 : tensor<128x256xi8>, tensor<256x256xi8>) outs(%21 : tensor<128x256xi32>) -> tensor<128x256xi32>
        %23 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%22, %17, %18 : tensor<128x256xi32>, tensor<128xf32>, tensor<256xf32>) outs(%19 : tensor<128x256xf32>) {
        ^bb0(%in: i32, %in_0: f32, %in_1: f32, %out: f32):
          %24 = arith.sitofp %in : i32 to f32
          %25 = arith.mulf %24, %in_0 : f32
          %26 = arith.mulf %25, %in_1 : f32
          linalg.yield %26 : f32
        } -> tensor<128x256xf32>
        flow.dispatch.tensor.store %23, %14, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : tensor<128x256xf32> -> !flow.dispatch.tensor<writeonly:tensor<128x256xf32>>
        return
      }
    }
  }
}

The text was updated successfully, but these errors were encountered:

dcaballe added codegen Shared code generation infrastructure and dialects codegen/llvm LLVM code generation compiler backend labels Mar 14, 2024

dcaballe assigned banach-space Mar 14, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[CPU][SVE] Failed to legalize multi-dim constant #16784

[CPU][SVE] Failed to legalize multi-dim constant #16784

dcaballe commented Mar 14, 2024

[CPU][SVE] Failed to legalize multi-dim constant #16784

[CPU][SVE] Failed to legalize multi-dim constant #16784

Comments

dcaballe commented Mar 14, 2024