From 3e2b65c76a4df4d272ea96bc1885f67069c8a0fe Mon Sep 17 00:00:00 2001 From: Erick Ochoa Date: Tue, 18 Nov 2025 09:20:24 -0500 Subject: [PATCH 1/3] Sink op creation --- .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index edc6565f44f00..d70da4a6f0797 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1738,15 +1738,17 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( auto sourceType = cast(op.getSource().getType()); auto srcElemType = cast(sourceType.getElementType()); unsigned bitWidth = srcElemType.getWidth(); - int32_t scaleSel = - getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); auto targetType = cast(op.getResult().getType()); auto destElemType = cast(targetType.getElementType()); - IntegerType i32 = rewriter.getI32Type(); - Value castedScale = - LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + std::optional maybeIntrinsic = + scaledExtPacked816ToIntrinsic(srcElemType, destElemType); + if (!maybeIntrinsic.has_value()) + return op.emitOpError( + "no intrinsic matching packed scaled conversion on the given chipset"); + + IntegerType i32 = rewriter.getI32Type(); Value source = adaptor.getSource(); Type llvmResultType = typeConverter->convertType(op.getResult().getType()); Type packedType = nullptr; @@ -1767,15 +1769,15 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( return rewriter.notifyMatchFailure(op, "type conversion failed"); } + int32_t scaleSel = + getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); + + Value castedScale = + LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + Value castedSource = LLVM::BitcastOp::create(rewriter, loc, packedType, source); - std::optional maybeIntrinsic = - scaledExtPacked816ToIntrinsic(srcElemType, destElemType); - if (!maybeIntrinsic.has_value()) - return op.emitOpError( - "no intrinsic matching packed scaled conversion on the given chipset"); - OperationState loweredOp(loc, *maybeIntrinsic); loweredOp.addTypes({llvmResultType}); loweredOp.addOperands({castedSource, castedScale}); From d02904ed9a9ca8e874d468754e8d35ce5c8b2eca Mon Sep 17 00:00:00 2001 From: Erick Ochoa Date: Tue, 18 Nov 2025 09:24:47 -0500 Subject: [PATCH 2/3] Move op.emitOpError below notifyMatchFailure --- mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index d70da4a6f0797..97487c727f999 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1742,12 +1742,6 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( auto targetType = cast(op.getResult().getType()); auto destElemType = cast(targetType.getElementType()); - std::optional maybeIntrinsic = - scaledExtPacked816ToIntrinsic(srcElemType, destElemType); - if (!maybeIntrinsic.has_value()) - return op.emitOpError( - "no intrinsic matching packed scaled conversion on the given chipset"); - IntegerType i32 = rewriter.getI32Type(); Value source = adaptor.getSource(); Type llvmResultType = typeConverter->convertType(op.getResult().getType()); @@ -1769,6 +1763,12 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( return rewriter.notifyMatchFailure(op, "type conversion failed"); } + std::optional maybeIntrinsic = + scaledExtPacked816ToIntrinsic(srcElemType, destElemType); + if (!maybeIntrinsic.has_value()) + return op.emitOpError( + "no intrinsic matching packed scaled conversion on the given chipset"); + int32_t scaleSel = getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); From 31de9972036cdad573437374321355010f230c26 Mon Sep 17 00:00:00 2001 From: Erick Ochoa Lopez Date: Tue, 18 Nov 2025 10:03:40 -0500 Subject: [PATCH 3/3] Update mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp Co-authored-by: Jakub Kuderski --- mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 97487c727f999..b9a5e7d7f6eac 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1771,10 +1771,8 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( int32_t scaleSel = getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); - Value castedScale = LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); - Value castedSource = LLVM::BitcastOp::create(rewriter, loc, packedType, source);