diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp index f2b0e71c9397f..59a1ad9dbe189 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp @@ -517,8 +517,7 @@ void LayoutInfoPropagation::visitPrefetchNdOp( auto [bWidth, bHeight, bCount] = blockWHC.value(); SmallVector instData; int instWidth = xegpu::getLargestDivisor( - static_cast(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth, - bCount); + static_cast(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth); if (instWidth == -1) prefetch.emitWarning( "No suitable instruction multiple found for the given shape."); @@ -759,8 +758,7 @@ void LayoutInfoPropagation::visitStoreNdOp( auto [bWidth, bHeight, bCount] = blockWHC.value(); SmallVector instData; int instWidth = xegpu::getLargestDivisor( - static_cast(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth, - bCount); + static_cast(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth); if (instWidth == -1) store.emitWarning( "No suitable instruction multiple found for the given shape."); diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir index d911baa49acbb..32fb3178a8af2 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir @@ -6,6 +6,8 @@ // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32> // CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> // CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: xegpu.prefetch_nd %[[TDESC_SRC]] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> // CHECK: %[[LOADED:.*]] = xegpu.load_nd %0 <{layout = #xegpu.layout}> {layout_result_0 = #xegpu.layout} : // CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -> vector<8x32xf32> // CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> @@ -16,6 +18,7 @@ func.func @load_store_no_array_len(%arg0: memref<8x32xf32>, %arg1: memref<8x32xf %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32> %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> %1 = xegpu.create_nd_tdesc %arg1 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> + xegpu.prefetch_nd %0 <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<8x32xf32> %2 = xegpu.load_nd %0 : !xegpu.tensor_desc<8x32xf32> -> vector<8x32xf32> xegpu.store_nd %2, %1 : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32> return