diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp index dccc027913b8c..e7537ba1f0a79 100644 --- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -99,26 +99,22 @@ std::string mangle(StringRef baseName, ArrayRef types, static int32_t getL1CacheControl(LoadCacheControl cc) { int32_t control = 0; switch (cc) { - case LoadCacheControl::L1UC_L2UC_L3UC: - case LoadCacheControl::L1UC_L2UC_L3C: - case LoadCacheControl::L1UC_L2C_L3UC: - case LoadCacheControl::L1UC_L2C_L3C: - control = 1; - break; case LoadCacheControl::L1C_L2UC_L3UC: case LoadCacheControl::L1C_L2UC_L3C: case LoadCacheControl::L1C_L2C_L3UC: case LoadCacheControl::L1C_L2C_L3C: - control = 2; + control = 1; break; case LoadCacheControl::L1S_L2UC_L3UC: case LoadCacheControl::L1S_L2UC_L3C: case LoadCacheControl::L1S_L2C_L3UC: case LoadCacheControl::L1S_L2C_L3C: - control = 3; + control = 2; break; case LoadCacheControl::INVALIDATE_READ: - control = 4; + control = 3; + break; + default: break; } return control; @@ -127,16 +123,15 @@ static int32_t getL1CacheControl(LoadCacheControl cc) { static int32_t getL1CacheControl(StoreCacheControl cc) { int32_t control = 0; switch (cc) { - case StoreCacheControl::L1UC_L2UC_L3UC: - case StoreCacheControl::L1UC_L2UC_L3WB: - case StoreCacheControl::L1UC_L2WB_L3UC: - case StoreCacheControl::L1UC_L2WB_L3WB: - control = 1; - break; case StoreCacheControl::L1WT_L2UC_L3UC: case StoreCacheControl::L1WT_L2UC_L3WB: case StoreCacheControl::L1WT_L2WB_L3UC: case StoreCacheControl::L1WT_L2WB_L3WB: + control = 1; + break; + case StoreCacheControl::L1WB_L2UC_L3UC: + case StoreCacheControl::L1WB_L2WB_L3UC: + case StoreCacheControl::L1WB_L2UC_L3WB: control = 2; break; case StoreCacheControl::L1S_L2UC_L3UC: @@ -145,10 +140,7 @@ static int32_t getL1CacheControl(StoreCacheControl cc) { case StoreCacheControl::L1S_L2WB_L3WB: control = 3; break; - case StoreCacheControl::L1WB_L2UC_L3UC: - case StoreCacheControl::L1WB_L2WB_L3UC: - case StoreCacheControl::L1WB_L2UC_L3WB: - control = 4; + default: break; } return control; @@ -157,24 +149,18 @@ static int32_t getL1CacheControl(StoreCacheControl cc) { static int32_t getL3CacheControl(LoadCacheControl cc) { int32_t control = 0; switch (cc) { - case LoadCacheControl::L1UC_L2UC_L3UC: - case LoadCacheControl::L1UC_L2C_L3UC: - case LoadCacheControl::L1C_L2UC_L3UC: - case LoadCacheControl::L1C_L2C_L3UC: - case LoadCacheControl::L1S_L2UC_L3UC: - case LoadCacheControl::L1S_L2C_L3UC: - control = 1; - break; case LoadCacheControl::L1UC_L2UC_L3C: case LoadCacheControl::L1UC_L2C_L3C: case LoadCacheControl::L1C_L2UC_L3C: case LoadCacheControl::L1C_L2C_L3C: case LoadCacheControl::L1S_L2UC_L3C: case LoadCacheControl::L1S_L2C_L3C: - control = 2; + control = 1; break; case LoadCacheControl::INVALIDATE_READ: - control = 4; + control = 3; + break; + default: break; } return control; @@ -183,16 +169,6 @@ static int32_t getL3CacheControl(LoadCacheControl cc) { static int32_t getL3CacheControl(StoreCacheControl cc) { int32_t control = 0; switch (cc) { - case StoreCacheControl::L1UC_L2UC_L3UC: - case StoreCacheControl::L1UC_L2WB_L3UC: - case StoreCacheControl::L1WT_L2UC_L3UC: - case StoreCacheControl::L1WT_L2WB_L3UC: - case StoreCacheControl::L1S_L2UC_L3UC: - case StoreCacheControl::L1S_L2WB_L3UC: - case StoreCacheControl::L1WB_L2UC_L3UC: - case StoreCacheControl::L1WB_L2WB_L3UC: - control = 1; - break; case StoreCacheControl::L1UC_L2UC_L3WB: case StoreCacheControl::L1UC_L2WB_L3WB: case StoreCacheControl::L1WT_L2UC_L3WB: @@ -202,6 +178,8 @@ static int32_t getL3CacheControl(StoreCacheControl cc) { case StoreCacheControl::L1WB_L2UC_L3WB: control = 2; break; + default: + break; } return control; } @@ -265,7 +243,7 @@ static std::optional getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) { if (!getCacheControl(op)) return {}; - constexpr int32_t decorationCacheControlArity{4}; + constexpr int32_t decorationCacheControlArity{3}; constexpr int32_t loadCacheControlKey{6442}; constexpr int32_t storeCacheControlKey{6443}; constexpr bool isLoad = std::is_same_v || @@ -275,9 +253,9 @@ getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) { std::is_same_v; const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey}; SmallVector decorationsL1{ - controlKey, 0, getL1CacheControl(op), 0}; + controlKey, 0, getL1CacheControl(op)}; SmallVector decorationsL3{ - controlKey, 1, getL3CacheControl(op), 0}; + controlKey, 1, getL3CacheControl(op)}; auto arrayAttrL1 = rewriter.getI32ArrayAttr(decorationsL1); auto arrayAttrL3 = rewriter.getI32ArrayAttr(decorationsL3); diff --git a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp index 7e9318ad3c019..ba098aa5fde50 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/XeVM/XeVMToLLVMIRTranslation.cpp @@ -68,7 +68,7 @@ class XeVMDialectLLVMIRTranslationInterface attrs, std::back_inserter(decorations), [&ctx, i32Ty](Attribute attr) -> llvm::Metadata * { auto valuesArray = dyn_cast(attr).getValue(); - std::array metadata; + std::array metadata; llvm::transform( valuesArray, metadata.begin(), [i32Ty](Attribute valueAttr) { return llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir index 06a0ff5e7484b..e3fb3af67b3e0 100644 --- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir +++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir @@ -32,8 +32,8 @@ llvm.func @blockload2d(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32 // CHECK-LABEL: llvm.func spir_funccc @_Z41intel_sub_group_2d_block_read_16b_8r16x1cPU3AS1viiiDv2_iPt( llvm.func @blockload2d_cache_control(%a: !llvm.ptr<1>, %base_width_a: i32, %base_height_a: i32, %base_pitch_a: i32, %x: i32, %y: i32) -> vector<8xi16> { // CHECK: xevm.DecorationCacheControl = - // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32 - // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32 + // CHECK-SAME: 6442 : i32, 0 : i32, 0 : i32 + // CHECK-SAME: 6442 : i32, 1 : i32, 0 : i32 %loaded_a = xevm.blockload2d %a, %base_width_a, %base_height_a, %base_pitch_a, %x, %y <{elem_size_in_bits=16 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, transpose=false, pack_register=false, cache_control=#xevm.load_cache_control}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi16> @@ -160,8 +160,8 @@ llvm.func @blockstore2d(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i3 // CHECK-LABEL: llvm.func spir_funccc @_Z42intel_sub_group_2d_block_write_32b_8r16x1cPU3AS1viiiDv2_iPj( llvm.func @blockstore2d_cache_control(%c: !llvm.ptr<1>, %base_width_c: i32, %base_height_c: i32, %base_pitch_c: i32, %x: i32, %y: i32, %c_result_casted: vector<8xi32>) { // CHECK: xevm.DecorationCacheControl = - // CHECK-SAME: 6443 : i32, 0 : i32, 2 : i32, 0 : i32 - // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32, 0 : i32 + // CHECK-SAME: 6443 : i32, 0 : i32, 1 : i32 + // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32 xevm.blockstore2d %c, %base_width_c, %base_height_c, %base_pitch_c, %x, %y, %c_result_casted <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control = #xevm.store_cache_control}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>) @@ -242,8 +242,8 @@ llvm.func @prefetch(%ptr: !llvm.ptr<1>) { // CHECK-LABEL: llvm.func @llvm.load llvm.func @llvm.load(%a: !llvm.ptr<1>) -> i32 { // CHECK: xevm.DecorationCacheControl = - // CHECK-SAME: 6442 : i32, 0 : i32, 1 : i32, 0 : i32 - // CHECK-SAME: 6442 : i32, 1 : i32, 1 : i32, 0 : i32 + // CHECK-SAME: 6442 : i32, 0 : i32, 0 : i32 + // CHECK-SAME: 6442 : i32, 1 : i32, 0 : i32 %val = llvm.load %a {cache_control=#xevm.load_cache_control} : !llvm.ptr<1> -> i32 llvm.return %val : i32 } @@ -252,8 +252,8 @@ llvm.func @llvm.load(%a: !llvm.ptr<1>) -> i32 { // CHECK-LABEL: llvm.func @llvm.store llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) { // CHECK: xevm.DecorationCacheControl = - // CHECK-SAME: 6443 : i32, 0 : i32, 2 : i32, 0 : i32 - // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32, 0 : i32 + // CHECK-SAME: 6443 : i32, 0 : i32, 1 : i32 + // CHECK-SAME: 6443 : i32, 1 : i32, 2 : i32 llvm.store %val, %a {cache_control=#xevm.store_cache_control} : i32, !llvm.ptr<1> llvm.return } @@ -266,8 +266,8 @@ llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> { // CHECK-SAME: {function_type = !llvm.func (ptr<1>)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t", // CHECK-SAME: visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], - // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + // CHECK-SAME: [6442 : i32, 0 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 0 : i32] %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<1>) -> vector<8xi16> llvm.return %loaded_a : vector<8xi16> } @@ -280,8 +280,8 @@ llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> { // CHECK-SAME: {function_type = !llvm.func (ptr<3>)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64, // CHECK-SAME: will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], - // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + // CHECK-SAME: [6442 : i32, 0 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 0 : i32] %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<3>) -> vector<16xi8> llvm.return %loaded_a : vector<16xi8> } @@ -294,8 +294,8 @@ llvm.func @blockload_scalar(%ptr: !llvm.ptr<3>) -> i8 { // CHECK-SAME: {function_type = !llvm.func)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z29intel_sub_group_block_read_ucPU3AS3h", visibility_ = 0 : i64, // CHECK-SAME: will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], - // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + // CHECK-SAME: [6442 : i32, 0 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 0 : i32] %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<3>) -> i8 llvm.return %loaded_a : i8 } @@ -308,8 +308,8 @@ llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) { // CHECK-SAME: {function_type = !llvm.func, vector<8xi32>)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64, // CHECK-SAME: will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], - // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + // CHECK-SAME: [6443 : i32, 0 : i32, 1 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32] xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<1>, vector<8xi32>) llvm.return } @@ -322,8 +322,8 @@ llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) { // CHECK-SAME: {function_type = !llvm.func, vector<2xi64>)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64, // CHECK-SAME: will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], - // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + // CHECK-SAME: [6443 : i32, 0 : i32, 1 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32] xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<3>, vector<2xi64>) llvm.return } @@ -336,8 +336,8 @@ llvm.func @blockstore_scalar(%ptr: !llvm.ptr<3>, %data: i64) { // CHECK-SAME: {function_type = !llvm.func, i64)>, linkage = #llvm.linkage, // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_write_ulPU3AS3mm", visibility_ = 0 : i64, // CHECK-SAME: will_return, xevm.DecorationCacheControl = - // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], - // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + // CHECK-SAME: [6443 : i32, 0 : i32, 1 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32] xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<3>, i64) llvm.return } diff --git a/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir b/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir index cea05b8709d72..a610e28a07e41 100644 --- a/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir +++ b/mlir/test/Integration/Dialect/XeVM/GPU/xevm_block_load_store.mlir @@ -32,7 +32,7 @@ module @gemm attributes {gpu.container_module} { // would only load 4 elements into vector<8xi32> %loaded = xevm.blockload2d %src, %base_width, %base_height, %base_pitch, %x, %y <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, v_blocks=1 : i32, - transpose=false, pack_register=false}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> + transpose=false, pack_register=false, cache_control=#xevm.load_cache_control}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32) -> vector<8xi32> %loaded_f32 = vector.bitcast %loaded : vector<8xi32> to vector<8xf32> %c0 = arith.constant 0 : index %thread_x = gpu.thread_id x @@ -42,7 +42,7 @@ module @gemm attributes {gpu.container_module} { %loaded_f32_modified = vector.insert %thread_x_f32, %loaded_f32[%c0] : f32 into vector<8xf32> %loaded_modified = vector.bitcast %loaded_f32_modified : vector<8xf32> to vector<8xi32> xevm.blockstore2d %dst, %base_width, %base_height, %base_pitch, %x, %y, %loaded_modified - <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32}> + <{elem_size_in_bits=32 : i32, tile_width=16 : i32, tile_height=8 : i32, cache_control = #xevm.store_cache_control}> : (!llvm.ptr<1>, i32, i32, i32, i32, i32, vector<8xi32>) gpu.return } diff --git a/mlir/test/Target/LLVMIR/xevm.mlir b/mlir/test/Target/LLVMIR/xevm.mlir index 112d923607060..fcb0d6990ce8f 100644 --- a/mlir/test/Target/LLVMIR/xevm.mlir +++ b/mlir/test/Target/LLVMIR/xevm.mlir @@ -9,15 +9,15 @@ module { llvm.call spir_funccc @_Z8prefetchPU3AS1Kcm(%arg0, %0) {function_type = !llvm.func, i64)>, linkage = #llvm.linkage, no_unwind, sym_name = "_Z8prefetchPU3AS1Kcm", visibility_ = 0 : i64, - xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32, 0 : i32], [6442 : i32, 1 : i32, 1 : i32, 0 : i32]]} + xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32], [6442 : i32, 1 : i32, 1 : i32]]} : (!llvm.ptr<1>, i64) -> () llvm.return } } // CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]} -// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1, i32 0} -// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1, i32 0} +// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1} +// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1} // ----- module { @@ -26,14 +26,14 @@ module { llvm.func @load(%arg0: !llvm.ptr<1>) -> i32 { // CHECK: load i32, ptr addrspace(1) %[[ARG0]], align 4, // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]] - %0 = llvm.load %arg0 {xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32, 0 : i32], [6442 : i32, 1 : i32, 1 : i32, 0 : i32]]} : !llvm.ptr<1> -> i32 + %0 = llvm.load %arg0 {xevm.DecorationCacheControl = [[6442 : i32, 0 : i32, 1 : i32], [6442 : i32, 1 : i32, 1 : i32]]} : !llvm.ptr<1> -> i32 llvm.return %0 : i32 } } // CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]} -// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1, i32 0} -// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1, i32 0} +// CHECK: ![[DECO2]] = !{i32 6442, i32 0, i32 1} +// CHECK: ![[DECO3]] = !{i32 6442, i32 1, i32 1} // ----- module { @@ -42,12 +42,12 @@ module { llvm.func @store(%arg0: !llvm.ptr<1>, %arg1: i32) { // CHECK: store i32 %[[ARG1]], ptr addrspace(1) %[[ARG0]], align 4, // CHECK-SAME: !spirv.DecorationCacheControlINTEL ![[DECO1:.*]] - llvm.store %arg1, %arg0 {xevm.DecorationCacheControl = [[6443 : i32, 0 : i32, 2 : i32, 0 : i32], [6443 : i32, 1 : i32, 2 : i32, 0 : i32]]} : i32, !llvm.ptr<1> + llvm.store %arg1, %arg0 {xevm.DecorationCacheControl = [[6443 : i32, 0 : i32, 2 : i32], [6443 : i32, 1 : i32, 2 : i32]]} : i32, !llvm.ptr<1> llvm.return } } // CHECK: ![[DECO1]] = !{![[DECO2:.*]], ![[DECO3:.*]]} -// CHECK: ![[DECO2]] = !{i32 6443, i32 0, i32 2, i32 0} -// CHECK: ![[DECO3]] = !{i32 6443, i32 1, i32 2, i32 0} +// CHECK: ![[DECO2]] = !{i32 6443, i32 0, i32 2} +// CHECK: ![[DECO3]] = !{i32 6443, i32 1, i32 2}