diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index edc6565f44f00..b9a5e7d7f6eac 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -1738,15 +1738,11 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( auto sourceType = cast(op.getSource().getType()); auto srcElemType = cast(sourceType.getElementType()); unsigned bitWidth = srcElemType.getWidth(); - int32_t scaleSel = - getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); auto targetType = cast(op.getResult().getType()); auto destElemType = cast(targetType.getElementType()); - IntegerType i32 = rewriter.getI32Type(); - Value castedScale = - LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + IntegerType i32 = rewriter.getI32Type(); Value source = adaptor.getSource(); Type llvmResultType = typeConverter->convertType(op.getResult().getType()); Type packedType = nullptr; @@ -1767,15 +1763,19 @@ LogicalResult ScaledExtPacked816OpLowering::matchAndRewrite( return rewriter.notifyMatchFailure(op, "type conversion failed"); } - Value castedSource = - LLVM::BitcastOp::create(rewriter, loc, packedType, source); - std::optional maybeIntrinsic = scaledExtPacked816ToIntrinsic(srcElemType, destElemType); if (!maybeIntrinsic.has_value()) return op.emitOpError( "no intrinsic matching packed scaled conversion on the given chipset"); + int32_t scaleSel = + getScaleSel(blockSize, bitWidth, firstScaleLane, firstScaleByte); + Value castedScale = + LLVM::BitcastOp::create(rewriter, loc, i32, adaptor.getScale()); + Value castedSource = + LLVM::BitcastOp::create(rewriter, loc, packedType, source); + OperationState loweredOp(loc, *maybeIntrinsic); loweredOp.addTypes({llvmResultType}); loweredOp.addOperands({castedSource, castedScale});