diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 43c7b3df73efe..f8e1ab38e80d4 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -6341,11 +6341,15 @@ def NVVM_AddFOp : NVVM_FloatBinaryOp<"addf", [Commutative]> { let hasVerifier = 1; let extraClassDeclaration = [{ + // Shared lowering for nvvm.addf and nvvm.subf (via fneg+add) static void lowerAddFToLLVMIR( - Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder); + llvm::Value *lhs, llvm::Value *rhs, Value res, + NVVM::FPRoundingMode rnd, NVVM::SaturationMode sat, bool ftz, + LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder); }]; let llvmBuilder = [{ - NVVM::AddFOp::lowerAddFToLLVMIR(*op, moduleTranslation, builder); + NVVM::AddFOp::lowerAddFToLLVMIR($lhs, $rhs, op.getRes(), $rnd, $sat, $ftz, + moduleTranslation, builder); }]; } @@ -6366,6 +6370,14 @@ def NVVM_SubFOp : NVVM_FloatBinaryOp<"subf"> { }]; let hasCanonicalizer = 1; + let hasVerifier = 1; + + let llvmBuilder = [{ + // sub(a, b) = add(a, -b) + llvm::Value *rhs = builder.CreateFNeg($rhs); + NVVM::AddFOp::lowerAddFToLLVMIR($lhs, rhs, op.getRes(), $rnd, $sat, $ftz, + moduleTranslation, builder); + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp index 354a02f4a2aae..6ccd59cec65bc 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -3059,33 +3059,34 @@ LogicalResult NVVM::TensormapReplaceOp::verify() { return success(); } -LogicalResult NVVM::AddFOp::verify() { - mlir::NVVM::FPRoundingMode rndMode = getRnd(); - mlir::NVVM::SaturationMode satMode = getSat(); - bool isFTZ = getFtz(); +template +static LogicalResult verifyAddSubFOp(OpType op) { + mlir::NVVM::FPRoundingMode rndMode = op.getRnd(); + mlir::NVVM::SaturationMode satMode = op.getSat(); + bool isFTZ = op.getFtz(); - mlir::Type opType = getRes().getType(); + mlir::Type opType = op.getRes().getType(); mlir::Type opBaseType = isa(opType) ? cast(opType).getElementType() : opType; if (opBaseType.isF64() && (satMode != NVVM::SaturationMode::NONE || isFTZ)) - return emitOpError("FTZ and saturation are not supported for additions " - "involving f64 type"); + return op.emitOpError("FTZ and saturation are not supported for " + "additions/subtractions involving f64 type"); if (opBaseType.isF16() && !(rndMode == NVVM::FPRoundingMode::RN || rndMode == NVVM::FPRoundingMode::NONE)) - return emitOpError("only RN rounding mode is supported for f16 and " - "vector<2xf16> additions"); + return op.emitOpError("only RN rounding mode is supported for f16 and " + "vector<2xf16> additions/subtractions"); if (opBaseType.isBF16()) { if (rndMode != NVVM::FPRoundingMode::RN && rndMode != NVVM::FPRoundingMode::NONE) - return emitOpError("only RN rounding mode is supported for bf16 and " - "vector<2xbf16> additions"); + return op.emitOpError("only RN rounding mode is supported for bf16 and " + "vector<2xbf16> additions/subtractions"); if (satMode != NVVM::SaturationMode::NONE || isFTZ) - return emitOpError("FTZ and saturation are not supported for bf16 and " - "vector<2xbf16> additions"); + return op.emitOpError("FTZ and saturation are not supported for bf16 and " + "vector<2xbf16> additions/subtractions"); } // FIXME: This is a temporary check disallowing lowering to add.rn.ftz.f16(x2) @@ -3093,12 +3094,16 @@ LogicalResult NVVM::AddFOp::verify() { // should be removed once the intrinsics for f16 addition (with FTZ only) are // available. if (opBaseType.isF16() && isFTZ && satMode == NVVM::SaturationMode::NONE) - return emitOpError("FTZ with no saturation is not supported for f16 and " - "vector<2xf16> additions"); + return op.emitOpError("FTZ with no saturation is not supported for f16 and " + "vector<2xf16> additions/subtractions"); return success(); } +LogicalResult NVVM::AddFOp::verify() { return verifyAddSubFOp(*this); } + +LogicalResult NVVM::SubFOp::verify() { return verifyAddSubFOp(*this); } + /// Packs the given `field` into the `result`. /// The `result` is 64-bits and each `field` can be 32-bits or narrower. static llvm::Value * diff --git a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp index 02067bb456b25..092643f408ce6 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.cpp @@ -446,21 +446,15 @@ getFenceProxySyncRestrictID(NVVM::MemOrderKind order) { nvvm_fence_proxy_async_generic_release_sync_restrict_space_cta_scope_cluster; } -void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt, +void NVVM::AddFOp::lowerAddFToLLVMIR(llvm::Value *argLHS, llvm::Value *argRHS, + Value res, NVVM::FPRoundingMode rndMode, + NVVM::SaturationMode satMode, bool isFTZ, + LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) { - auto thisOp = cast(op); - NVVM::FPRoundingMode rndMode = thisOp.getRnd(); - NVVM::SaturationMode satMode = thisOp.getSat(); - bool isFTZ = thisOp.getFtz(); + llvm::Type *opTypeLLVM = argLHS->getType(); + bool isVectorOp = opTypeLLVM->isVectorTy(); bool isSat = satMode != NVVM::SaturationMode::NONE; - llvm::Value *argLHS = mt.lookupValue(thisOp.getLhs()); - llvm::Value *argRHS = mt.lookupValue(thisOp.getRhs()); - - mlir::Type opType = thisOp.getLhs().getType(); - llvm::Type *opTypeLLVM = mt.convertType(opType); - bool isVectorAdd = opTypeLLVM->isVectorTy(); - // FIXME: Add intrinsics for add.rn.ftz.f16x2 and add.rn.ftz.f16 here when // they are available. static constexpr llvm::Intrinsic::ID f16IDs[] = { @@ -507,8 +501,8 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt, return createIntrinsicCall(builder, IID, callArgs); }; - if (isVectorAdd && (opTypeLLVM->getScalarType()->isFloatTy() || - opTypeLLVM->getScalarType()->isDoubleTy())) { + if (isVectorOp && (opTypeLLVM->getScalarType()->isFloatTy() || + opTypeLLVM->getScalarType()->isDoubleTy())) { llvm::Value *result = llvm::PoisonValue::get( llvm::FixedVectorType::get(opTypeLLVM->getScalarType(), 2)); for (int64_t i = 0; i < 2; ++i) { @@ -531,25 +525,25 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt, if (opTypeLLVM->getScalarType()->isHalfTy()) { llvm::Value *result; if (isSat) { - unsigned index = (isVectorAdd << 1) | isFTZ; + unsigned index = (isVectorOp << 1) | isFTZ; result = addIntrinsic(f16IDs[index]); } else { result = builder.CreateFAdd(argLHS, argRHS); } - mt.mapValue(thisOp.getRes(), result); + mt.mapValue(res, result); return; } // bf16 + bf16 -> bf16 / vector<2xbf16> + vector<2xbf16> -> vector<2xbf16> if (opTypeLLVM->getScalarType()->isBFloatTy()) { - mt.mapValue(thisOp.getRes(), builder.CreateFAdd(argLHS, argRHS)); + mt.mapValue(res, builder.CreateFAdd(argLHS, argRHS)); return; } // f64 + f64 -> f64 / vector<2xf64> + vector<2xf64> -> vector<2xf64> if (opTypeLLVM->getScalarType()->isDoubleTy()) { unsigned index = static_cast(rndMode); - mt.mapValue(thisOp.getRes(), addIntrinsic(f64IDs[index])); + mt.mapValue(res, addIntrinsic(f64IDs[index])); return; } @@ -558,7 +552,7 @@ void NVVM::AddFOp::lowerAddFToLLVMIR(Operation &op, LLVM::ModuleTranslation &mt, if (opTypeLLVM->getScalarType()->isFloatTy()) { unsigned index = ((isFTZ << 1) | isSat) * numRndModes + static_cast(rndMode); - mt.mapValue(thisOp.getRes(), addIntrinsic(f32IDs[index])); + mt.mapValue(res, addIntrinsic(f32IDs[index])); return; } } diff --git a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir index b0b162357fe2f..23ba79ee3d8af 100644 --- a/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir +++ b/mlir/test/Target/LLVMIR/nvvm/addf/addf_invalid.mlir @@ -11,7 +11,7 @@ llvm.func @addf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { // ----- llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { - // expected-error@+1 {{FTZ and saturation are not supported for additions involving f64 type}} + // expected-error@+1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}} %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode, ftz=true} : f64 llvm.return %f1 : f64 } @@ -19,7 +19,7 @@ llvm.func @addf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { // ----- llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { - // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}} + // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : f16 llvm.return %f1 : f16 } @@ -27,7 +27,7 @@ llvm.func @addf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { // ----- llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> { - // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions}} + // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> llvm.return %f1 : vector<2xf16> } @@ -35,7 +35,7 @@ llvm.func @addf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) - // ----- llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { - // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}} + // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : bf16 llvm.return %f1 : bf16 } @@ -43,7 +43,7 @@ llvm.func @addf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { // ----- llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> { - // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions}} + // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> llvm.return %f1 : vector<2xbf16> } @@ -51,7 +51,7 @@ llvm.func @addf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16> // ----- llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { - // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions}} + // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {sat = #nvvm.sat_mode, ftz=true} : bf16 llvm.return %f1 : bf16 } @@ -61,7 +61,7 @@ llvm.func @addf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { // FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are // available. llvm.func @addf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 { - // expected-error@+1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions}} + // expected-error@+1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions/subtractions}} %f1 = nvvm.addf %a, %b {ftz=true} : f16 llvm.return %f1 : f16 } diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir new file mode 100644 index 0000000000000..e21bcfb42023d --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf.mlir @@ -0,0 +1,117 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// f16 - f16 -> f16 +llvm.func @fsub_f16_f16(%a : f16, %b : f16) -> f16 { + // CHECK-LABEL: define half @fsub_f16_f16(half %0, half %1) { + // CHECK-NEXT: %3 = fneg half %1 + // CHECK-NEXT: %4 = fadd half %0, %3 + // CHECK-NEXT: %5 = fneg half %4 + // CHECK-NEXT: %6 = fadd half %4, %5 + // CHECK-NEXT: %7 = fneg half %6 + // CHECK-NEXT: %8 = call half @llvm.nvvm.add.rn.sat.f16(half %6, half %7) + // CHECK-NEXT: %9 = fneg half %8 + // CHECK-NEXT: %10 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %8, half %9) + // CHECK-NEXT: ret half %10 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : f16 + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f16 + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f16 + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f16 + llvm.return %f4 : f16 +} + +// bf16 - bf16 -> bf16 +llvm.func @fsub_bf16_bf16(%a : bf16, %b : bf16) -> bf16 { + // CHECK-LABEL: define bfloat @fsub_bf16_bf16(bfloat %0, bfloat %1) { + // CHECK-NEXT: %3 = fneg bfloat %1 + // CHECK-NEXT: %4 = fadd bfloat %0, %3 + // CHECK-NEXT: %5 = fneg bfloat %4 + // CHECK-NEXT: %6 = fadd bfloat %4, %5 + // CHECK-NEXT: ret bfloat %6 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : bf16 + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : bf16 + llvm.return %f2 : bf16 +} + +// f32 - f32 -> f32 +llvm.func @fsub_f32_f32(%a : f32, %b : f32) -> f32 { + // CHECK-LABEL: define float @fsub_f32_f32(float %0, float %1) { + // CHECK-NEXT: %3 = fneg float %1 + // CHECK-NEXT: %4 = call float @llvm.nvvm.add.rn.f(float %0, float %3) + // CHECK-NEXT: %5 = fneg float %4 + // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %4, float %5) + // CHECK-NEXT: %7 = fneg float %6 + // CHECK-NEXT: %8 = call float @llvm.nvvm.add.rn.sat.f(float %6, float %7) + // CHECK-NEXT: %9 = fneg float %8 + // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rn.ftz.f(float %8, float %9) + // CHECK-NEXT: %11 = fneg float %10 + // CHECK-NEXT: %12 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %10, float %11) + // CHECK-NEXT: %13 = fneg float %12 + // CHECK-NEXT: %14 = call float @llvm.nvvm.add.rm.f(float %12, float %13) + // CHECK-NEXT: %15 = fneg float %14 + // CHECK-NEXT: %16 = call float @llvm.nvvm.add.rm.sat.f(float %14, float %15) + // CHECK-NEXT: %17 = fneg float %16 + // CHECK-NEXT: %18 = call float @llvm.nvvm.add.rm.ftz.f(float %16, float %17) + // CHECK-NEXT: %19 = fneg float %18 + // CHECK-NEXT: %20 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %18, float %19) + // CHECK-NEXT: %21 = fneg float %20 + // CHECK-NEXT: %22 = call float @llvm.nvvm.add.rp.f(float %20, float %21) + // CHECK-NEXT: %23 = fneg float %22 + // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rp.sat.f(float %22, float %23) + // CHECK-NEXT: %25 = fneg float %24 + // CHECK-NEXT: %26 = call float @llvm.nvvm.add.rp.ftz.f(float %24, float %25) + // CHECK-NEXT: %27 = fneg float %26 + // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %26, float %27) + // CHECK-NEXT: %29 = fneg float %28 + // CHECK-NEXT: %30 = call float @llvm.nvvm.add.rz.f(float %28, float %29) + // CHECK-NEXT: %31 = fneg float %30 + // CHECK-NEXT: %32 = call float @llvm.nvvm.add.rz.sat.f(float %30, float %31) + // CHECK-NEXT: %33 = fneg float %32 + // CHECK-NEXT: %34 = call float @llvm.nvvm.add.rz.ftz.f(float %32, float %33) + // CHECK-NEXT: %35 = fneg float %34 + // CHECK-NEXT: %36 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %34, float %35) + // CHECK-NEXT: ret float %36 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : f32 + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f32 + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 + %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + %f6 = nvvm.subf %f5, %f5 {rnd = #nvvm.fp_rnd_mode} : f32 + %f7 = nvvm.subf %f6, %f6 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 + %f8 = nvvm.subf %f7, %f7 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 + %f9 = nvvm.subf %f8, %f8 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + %f10 = nvvm.subf %f9, %f9 {rnd = #nvvm.fp_rnd_mode} : f32 + %f11 = nvvm.subf %f10, %f10 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 + %f12 = nvvm.subf %f11, %f11 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 + %f13 = nvvm.subf %f12, %f12 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + %f14 = nvvm.subf %f13, %f13 {rnd = #nvvm.fp_rnd_mode} : f32 + %f15 = nvvm.subf %f14, %f14 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : f32 + %f16 = nvvm.subf %f15, %f15 {rnd = #nvvm.fp_rnd_mode, ftz=true} : f32 + %f17 = nvvm.subf %f16, %f16 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : f32 + llvm.return %f17 : f32 +} + +// f64 - f64 -> f64 +llvm.func @fsub_f64_f64(%a : f64, %b : f64) -> f64 { + // CHECK-LABEL: define double @fsub_f64_f64(double %0, double %1) { + // CHECK-NEXT: %3 = fneg double %1 + // CHECK-NEXT: %4 = call double @llvm.nvvm.add.rn.d(double %0, double %3) + // CHECK-NEXT: %5 = fneg double %4 + // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %4, double %5) + // CHECK-NEXT: %7 = fneg double %6 + // CHECK-NEXT: %8 = call double @llvm.nvvm.add.rm.d(double %6, double %7) + // CHECK-NEXT: %9 = fneg double %8 + // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rp.d(double %8, double %9) + // CHECK-NEXT: %11 = fneg double %10 + // CHECK-NEXT: %12 = call double @llvm.nvvm.add.rz.d(double %10, double %11) + // CHECK-NEXT: ret double %12 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : f64 + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : f64 + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode} : f64 + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode} : f64 + %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode} : f64 + llvm.return %f5 : f64 +} diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir new file mode 100644 index 0000000000000..bf4bbd19cb396 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf_invalid.mlir @@ -0,0 +1,67 @@ +// RUN: mlir-translate --mlir-to-llvmir --split-input-file --verify-diagnostics %s + +// ----- + +llvm.func @subf_invalid_sat_mode(%a : f16, %b : f16) -> f16 { + // expected-error@+1 {{ attribute 'sat' failed to satisfy constraint: Describes the saturation mode whose value is one of {none, sat}}} + %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode} : f16 + llvm.return %f1 : f16 +} + +// ----- + +llvm.func @subf_invalid_f64_sat_ftz(%a : f64, %b : f64) -> f64 { + // expected-error@+1 {{FTZ and saturation are not supported for additions/subtractions involving f64 type}} + %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode, ftz=true} : f64 + llvm.return %f1 : f64 +} + +// ----- + +llvm.func @subf_invalid_f16_rnd_mode(%a : f16, %b : f16) -> f16 { + // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : f16 + llvm.return %f1 : f16 +} + +// ----- + +llvm.func @subf_invalid_v2f16_rnd_mode(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> { + // expected-error@+1 {{only RN rounding mode is supported for f16 and vector<2xf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + llvm.return %f1 : vector<2xf16> +} + +// ----- + +llvm.func @subf_invalid_bf16_rnd_mode(%a : bf16, %b : bf16) -> bf16 { + // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : bf16 + llvm.return %f1 : bf16 +} + +// ----- + +llvm.func @subf_invalid_v2bf16_rnd_mode(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> { + // expected-error@+1 {{only RN rounding mode is supported for bf16 and vector<2xbf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + llvm.return %f1 : vector<2xbf16> +} + +// ----- + +llvm.func @subf_invalid_bf16_sat_ftz(%a : bf16, %b : bf16) -> bf16 { + // expected-error@+1 {{FTZ and saturation are not supported for bf16 and vector<2xbf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {sat = #nvvm.sat_mode, ftz=true} : bf16 + llvm.return %f1 : bf16 +} + +// ----- + +// FIXME: Remove this test once intrinsics for f16 addition (with FTZ only) are +// available. +llvm.func @subf_invalid_f16_ftz_no_sat(%a : f16, %b : f16) -> f16 { + // expected-error@+1 {{FTZ with no saturation is not supported for f16 and vector<2xf16> additions/subtractions}} + %f1 = nvvm.subf %a, %b {ftz=true} : f16 + llvm.return %f1 : f16 +} diff --git a/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir b/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir new file mode 100644 index 0000000000000..3dca3fc41fa34 --- /dev/null +++ b/mlir/test/Target/LLVMIR/nvvm/subf/subf_vector.mlir @@ -0,0 +1,313 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// vector<2xf16> - vector<2xf16> -> vector<2xf16> +llvm.func @subf_vector_f16_f16(%a : vector<2xf16>, %b : vector<2xf16>) -> vector<2xf16> { + // CHECK-LABEL: define <2 x half> @subf_vector_f16_f16(<2 x half> %0, <2 x half> %1) { + // CHECK-NEXT: %3 = fneg <2 x half> %1 + // CHECK-NEXT: %4 = fadd <2 x half> %0, %3 + // CHECK-NEXT: %5 = fneg <2 x half> %4 + // CHECK-NEXT: %6 = fadd <2 x half> %4, %5 + // CHECK-NEXT: %7 = fneg <2 x half> %6 + // CHECK-NEXT: %8 = call <2 x half> @llvm.nvvm.add.rn.sat.v2f16(<2 x half> %6, <2 x half> %7) + // CHECK-NEXT: %9 = fneg <2 x half> %8 + // CHECK-NEXT: %10 = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %8, <2 x half> %9) + // CHECK-NEXT: ret <2 x half> %4 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : vector<2xf16> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf16> + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf16> + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf16> + llvm.return %f1 : vector<2xf16> +} + +// vector<2xbf16> - vector<2xbf16> -> vector<2xbf16> +llvm.func @subf_vector_bf16_bf16(%a : vector<2xbf16>, %b : vector<2xbf16>) -> vector<2xbf16> { + // CHECK-LABEL: define <2 x bfloat> @subf_vector_bf16_bf16(<2 x bfloat> %0, <2 x bfloat> %1) { + // CHECK-NEXT: %3 = fneg <2 x bfloat> %1 + // CHECK-NEXT: %4 = fadd <2 x bfloat> %0, %3 + // CHECK-NEXT: %5 = fneg <2 x bfloat> %4 + // CHECK-NEXT: %6 = fadd <2 x bfloat> %4, %5 + // CHECK-NEXT: ret <2 x bfloat> %6 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : vector<2xbf16> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xbf16> + llvm.return %f2 : vector<2xbf16> +} + +// vector<2xf32> - vector<2xf32> -> vector<2xf32> +llvm.func @subf_vector_f32_f32_rn(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> { + // CHECK-LABEL: define <2 x float> @subf_vector_f32_f32_rn(<2 x float> %0, <2 x float> %1) { + // CHECK-NEXT: %3 = fneg <2 x float> %1 + // CHECK-NEXT: %4 = extractelement <2 x float> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x float> %3, i32 0 + // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rn.f(float %4, float %5) + // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x float> %3, i32 1 + // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rn.f(float %8, float %9) + // CHECK-NEXT: %11 = insertelement <2 x float> %7, float %10, i32 1 + // CHECK-NEXT: %12 = fneg <2 x float> %11 + // CHECK-NEXT: %13 = extractelement <2 x float> %11, i32 0 + // CHECK-NEXT: %14 = extractelement <2 x float> %12, i32 0 + // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rn.f(float %13, float %14) + // CHECK-NEXT: %16 = insertelement <2 x float> poison, float %15, i32 0 + // CHECK-NEXT: %17 = extractelement <2 x float> %11, i32 1 + // CHECK-NEXT: %18 = extractelement <2 x float> %12, i32 1 + // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rn.f(float %17, float %18) + // CHECK-NEXT: %20 = insertelement <2 x float> %16, float %19, i32 1 + // CHECK-NEXT: %21 = fneg <2 x float> %20 + // CHECK-NEXT: %22 = extractelement <2 x float> %20, i32 0 + // CHECK-NEXT: %23 = extractelement <2 x float> %21, i32 0 + // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rn.sat.f(float %22, float %23) + // CHECK-NEXT: %25 = insertelement <2 x float> poison, float %24, i32 0 + // CHECK-NEXT: %26 = extractelement <2 x float> %20, i32 1 + // CHECK-NEXT: %27 = extractelement <2 x float> %21, i32 1 + // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rn.sat.f(float %26, float %27) + // CHECK-NEXT: %29 = insertelement <2 x float> %25, float %28, i32 1 + // CHECK-NEXT: %30 = fneg <2 x float> %29 + // CHECK-NEXT: %31 = extractelement <2 x float> %29, i32 0 + // CHECK-NEXT: %32 = extractelement <2 x float> %30, i32 0 + // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rn.ftz.f(float %31, float %32) + // CHECK-NEXT: %34 = insertelement <2 x float> poison, float %33, i32 0 + // CHECK-NEXT: %35 = extractelement <2 x float> %29, i32 1 + // CHECK-NEXT: %36 = extractelement <2 x float> %30, i32 1 + // CHECK-NEXT: %37 = call float @llvm.nvvm.add.rn.ftz.f(float %35, float %36) + // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 + // CHECK-NEXT: %39 = fneg <2 x float> %38 + // CHECK-NEXT: %40 = extractelement <2 x float> %38, i32 0 + // CHECK-NEXT: %41 = extractelement <2 x float> %39, i32 0 + // CHECK-NEXT: %42 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %40, float %41) + // CHECK-NEXT: %43 = insertelement <2 x float> poison, float %42, i32 0 + // CHECK-NEXT: %44 = extractelement <2 x float> %38, i32 1 + // CHECK-NEXT: %45 = extractelement <2 x float> %39, i32 1 + // CHECK-NEXT: %46 = call float @llvm.nvvm.add.rn.ftz.sat.f(float %44, float %45) + // CHECK-NEXT: %47 = insertelement <2 x float> %43, float %46, i32 1 + // CHECK-NEXT: ret <2 x float> %38 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> + %f5 = nvvm.subf %f4, %f4 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + llvm.return %f4 : vector<2xf32> +} + +llvm.func @subf_vector_f32_f32_rm(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> { + // CHECK-LABEL: define <2 x float> @subf_vector_f32_f32_rm(<2 x float> %0, <2 x float> %1) { + // CHECK-NEXT: %3 = fneg <2 x float> %1 + // CHECK-NEXT: %4 = extractelement <2 x float> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x float> %3, i32 0 + // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rm.f(float %4, float %5) + // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x float> %3, i32 1 + // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rm.f(float %8, float %9) + // CHECK-NEXT: %11 = insertelement <2 x float> %7, float %10, i32 1 + // CHECK-NEXT: %12 = fneg <2 x float> %11 + // CHECK-NEXT: %13 = extractelement <2 x float> %11, i32 0 + // CHECK-NEXT: %14 = extractelement <2 x float> %12, i32 0 + // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rm.sat.f(float %13, float %14) + // CHECK-NEXT: %16 = insertelement <2 x float> poison, float %15, i32 0 + // CHECK-NEXT: %17 = extractelement <2 x float> %11, i32 1 + // CHECK-NEXT: %18 = extractelement <2 x float> %12, i32 1 + // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rm.sat.f(float %17, float %18) + // CHECK-NEXT: %20 = insertelement <2 x float> %16, float %19, i32 1 + // CHECK-NEXT: %21 = fneg <2 x float> %20 + // CHECK-NEXT: %22 = extractelement <2 x float> %20, i32 0 + // CHECK-NEXT: %23 = extractelement <2 x float> %21, i32 0 + // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rm.ftz.f(float %22, float %23) + // CHECK-NEXT: %25 = insertelement <2 x float> poison, float %24, i32 0 + // CHECK-NEXT: %26 = extractelement <2 x float> %20, i32 1 + // CHECK-NEXT: %27 = extractelement <2 x float> %21, i32 1 + // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rm.ftz.f(float %26, float %27) + // CHECK-NEXT: %29 = insertelement <2 x float> %25, float %28, i32 1 + // CHECK-NEXT: %30 = fneg <2 x float> %29 + // CHECK-NEXT: %31 = extractelement <2 x float> %29, i32 0 + // CHECK-NEXT: %32 = extractelement <2 x float> %30, i32 0 + // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %31, float %32) + // CHECK-NEXT: %34 = insertelement <2 x float> poison, float %33, i32 0 + // CHECK-NEXT: %35 = extractelement <2 x float> %29, i32 1 + // CHECK-NEXT: %36 = extractelement <2 x float> %30, i32 1 + // CHECK-NEXT: %37 = call float @llvm.nvvm.add.rm.ftz.sat.f(float %35, float %36) + // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 + // CHECK-NEXT: ret <2 x float> %38 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + llvm.return %f4 : vector<2xf32> +} + +llvm.func @subf_vector_f32_f32_rp(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> { + // CHECK-LABEL: define <2 x float> @subf_vector_f32_f32_rp(<2 x float> %0, <2 x float> %1) { + // CHECK-NEXT: %3 = fneg <2 x float> %1 + // CHECK-NEXT: %4 = extractelement <2 x float> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x float> %3, i32 0 + // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rp.f(float %4, float %5) + // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x float> %3, i32 1 + // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rp.f(float %8, float %9) + // CHECK-NEXT: %11 = insertelement <2 x float> %7, float %10, i32 1 + // CHECK-NEXT: %12 = fneg <2 x float> %11 + // CHECK-NEXT: %13 = extractelement <2 x float> %11, i32 0 + // CHECK-NEXT: %14 = extractelement <2 x float> %12, i32 0 + // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rp.sat.f(float %13, float %14) + // CHECK-NEXT: %16 = insertelement <2 x float> poison, float %15, i32 0 + // CHECK-NEXT: %17 = extractelement <2 x float> %11, i32 1 + // CHECK-NEXT: %18 = extractelement <2 x float> %12, i32 1 + // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rp.sat.f(float %17, float %18) + // CHECK-NEXT: %20 = insertelement <2 x float> %16, float %19, i32 1 + // CHECK-NEXT: %21 = fneg <2 x float> %20 + // CHECK-NEXT: %22 = extractelement <2 x float> %20, i32 0 + // CHECK-NEXT: %23 = extractelement <2 x float> %21, i32 0 + // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rp.ftz.f(float %22, float %23) + // CHECK-NEXT: %25 = insertelement <2 x float> poison, float %24, i32 0 + // CHECK-NEXT: %26 = extractelement <2 x float> %20, i32 1 + // CHECK-NEXT: %27 = extractelement <2 x float> %21, i32 1 + // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rp.ftz.f(float %26, float %27) + // CHECK-NEXT: %29 = insertelement <2 x float> %25, float %28, i32 1 + // CHECK-NEXT: %30 = fneg <2 x float> %29 + // CHECK-NEXT: %31 = extractelement <2 x float> %29, i32 0 + // CHECK-NEXT: %32 = extractelement <2 x float> %30, i32 0 + // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %31, float %32) + // CHECK-NEXT: %34 = insertelement <2 x float> poison, float %33, i32 0 + // CHECK-NEXT: %35 = extractelement <2 x float> %29, i32 1 + // CHECK-NEXT: %36 = extractelement <2 x float> %30, i32 1 + // CHECK-NEXT: %37 = call float @llvm.nvvm.add.rp.ftz.sat.f(float %35, float %36) + // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 + // CHECK-NEXT: ret <2 x float> %38 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + llvm.return %f4 : vector<2xf32> +} + +llvm.func @subf_vector_f32_f32_rz(%a : vector<2xf32>, %b : vector<2xf32>) -> vector<2xf32> { + // CHECK-LABEL: define <2 x float> @subf_vector_f32_f32_rz(<2 x float> %0, <2 x float> %1) { + // CHECK-NEXT: %3 = fneg <2 x float> %1 + // CHECK-NEXT: %4 = extractelement <2 x float> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x float> %3, i32 0 + // CHECK-NEXT: %6 = call float @llvm.nvvm.add.rz.f(float %4, float %5) + // CHECK-NEXT: %7 = insertelement <2 x float> poison, float %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x float> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x float> %3, i32 1 + // CHECK-NEXT: %10 = call float @llvm.nvvm.add.rz.f(float %8, float %9) + // CHECK-NEXT: %11 = insertelement <2 x float> %7, float %10, i32 1 + // CHECK-NEXT: %12 = fneg <2 x float> %11 + // CHECK-NEXT: %13 = extractelement <2 x float> %11, i32 0 + // CHECK-NEXT: %14 = extractelement <2 x float> %12, i32 0 + // CHECK-NEXT: %15 = call float @llvm.nvvm.add.rz.sat.f(float %13, float %14) + // CHECK-NEXT: %16 = insertelement <2 x float> poison, float %15, i32 0 + // CHECK-NEXT: %17 = extractelement <2 x float> %11, i32 1 + // CHECK-NEXT: %18 = extractelement <2 x float> %12, i32 1 + // CHECK-NEXT: %19 = call float @llvm.nvvm.add.rz.sat.f(float %17, float %18) + // CHECK-NEXT: %20 = insertelement <2 x float> %16, float %19, i32 1 + // CHECK-NEXT: %21 = fneg <2 x float> %20 + // CHECK-NEXT: %22 = extractelement <2 x float> %20, i32 0 + // CHECK-NEXT: %23 = extractelement <2 x float> %21, i32 0 + // CHECK-NEXT: %24 = call float @llvm.nvvm.add.rz.ftz.f(float %22, float %23) + // CHECK-NEXT: %25 = insertelement <2 x float> poison, float %24, i32 0 + // CHECK-NEXT: %26 = extractelement <2 x float> %20, i32 1 + // CHECK-NEXT: %27 = extractelement <2 x float> %21, i32 1 + // CHECK-NEXT: %28 = call float @llvm.nvvm.add.rz.ftz.f(float %26, float %27) + // CHECK-NEXT: %29 = insertelement <2 x float> %25, float %28, i32 1 + // CHECK-NEXT: %30 = fneg <2 x float> %29 + // CHECK-NEXT: %31 = extractelement <2 x float> %29, i32 0 + // CHECK-NEXT: %32 = extractelement <2 x float> %30, i32 0 + // CHECK-NEXT: %33 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %31, float %32) + // CHECK-NEXT: %34 = insertelement <2 x float> poison, float %33, i32 0 + // CHECK-NEXT: %35 = extractelement <2 x float> %29, i32 1 + // CHECK-NEXT: %36 = extractelement <2 x float> %30, i32 1 + // CHECK-NEXT: %37 = call float @llvm.nvvm.add.rz.ftz.sat.f(float %35, float %36) + // CHECK-NEXT: %38 = insertelement <2 x float> %34, float %37, i32 1 + // CHECK-NEXT: ret <2 x float> %38 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf32> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode} : vector<2xf32> + %f3 = nvvm.subf %f2, %f2 {rnd = #nvvm.fp_rnd_mode, ftz=true} : vector<2xf32> + %f4 = nvvm.subf %f3, %f3 {rnd = #nvvm.fp_rnd_mode, sat = #nvvm.sat_mode, ftz=true} : vector<2xf32> + llvm.return %f4 : vector<2xf32> +} + +// vector<2xf64> - vector<2xf64> -> vector<2xf64> +llvm.func @subf_vector_f64_f64_rn(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> { + // CHECK-LABEL: define <2 x double> @subf_vector_f64_f64_rn(<2 x double> %0, <2 x double> %1) { + // CHECK-NEXT: %3 = fneg <2 x double> %1 + // CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x double> %3, i32 0 + // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rn.d(double %4, double %5) + // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x double> %3, i32 1 + // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rn.d(double %8, double %9) + // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 + // CHECK-NEXT: %12 = fneg <2 x double> %11 + // CHECK-NEXT: %13 = extractelement <2 x double> %11, i32 0 + // CHECK-NEXT: %14 = extractelement <2 x double> %12, i32 0 + // CHECK-NEXT: %15 = call double @llvm.nvvm.add.rn.d(double %13, double %14) + // CHECK-NEXT: %16 = insertelement <2 x double> poison, double %15, i32 0 + // CHECK-NEXT: %17 = extractelement <2 x double> %11, i32 1 + // CHECK-NEXT: %18 = extractelement <2 x double> %12, i32 1 + // CHECK-NEXT: %19 = call double @llvm.nvvm.add.rn.d(double %17, double %18) + // CHECK-NEXT: %20 = insertelement <2 x double> %16, double %19, i32 1 + // CHECK-NEXT: ret <2 x double> %20 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b : vector<2xf64> + %f2 = nvvm.subf %f1, %f1 {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + llvm.return %f2 : vector<2xf64> +} + +llvm.func @subf_vector_f64_f64_rm(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> { + // CHECK-LABEL: define <2 x double> @subf_vector_f64_f64_rm(<2 x double> %0, <2 x double> %1) { + // CHECK-NEXT: %3 = fneg <2 x double> %1 + // CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x double> %3, i32 0 + // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rm.d(double %4, double %5) + // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x double> %3, i32 1 + // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rm.d(double %8, double %9) + // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 + // CHECK-NEXT: ret <2 x double> %11 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + llvm.return %f1 : vector<2xf64> +} + +llvm.func @subf_vector_f64_f64_rp(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> { + // CHECK-LABEL: define <2 x double> @subf_vector_f64_f64_rp(<2 x double> %0, <2 x double> %1) { + // CHECK-NEXT: %3 = fneg <2 x double> %1 + // CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x double> %3, i32 0 + // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rp.d(double %4, double %5) + // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x double> %3, i32 1 + // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rp.d(double %8, double %9) + // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 + // CHECK-NEXT: ret <2 x double> %11 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + llvm.return %f1 : vector<2xf64> +} + +llvm.func @subf_vector_f64_f64_rz(%a : vector<2xf64>, %b : vector<2xf64>) -> vector<2xf64> { + // CHECK-LABEL: define <2 x double> @subf_vector_f64_f64_rz(<2 x double> %0, <2 x double> %1) { + // CHECK-NEXT: %3 = fneg <2 x double> %1 + // CHECK-NEXT: %4 = extractelement <2 x double> %0, i32 0 + // CHECK-NEXT: %5 = extractelement <2 x double> %3, i32 0 + // CHECK-NEXT: %6 = call double @llvm.nvvm.add.rz.d(double %4, double %5) + // CHECK-NEXT: %7 = insertelement <2 x double> poison, double %6, i32 0 + // CHECK-NEXT: %8 = extractelement <2 x double> %0, i32 1 + // CHECK-NEXT: %9 = extractelement <2 x double> %3, i32 1 + // CHECK-NEXT: %10 = call double @llvm.nvvm.add.rz.d(double %8, double %9) + // CHECK-NEXT: %11 = insertelement <2 x double> %7, double %10, i32 1 + // CHECK-NEXT: ret <2 x double> %11 + // CHECK-NEXT: } + %f1 = nvvm.subf %a, %b {rnd = #nvvm.fp_rnd_mode} : vector<2xf64> + llvm.return %f1 : vector<2xf64> +}