diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 197aae6e03cb1..c4afff5f4cc6c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4163,12 +4163,15 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead( std::optional AArch64TTIImpl::getFP16BF16PromoteCost( Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, - TTI::OperandValueInfo Op2Info, bool IncludeTrunc, + TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function InstCost) const { if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy()) return std::nullopt; if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16()) return std::nullopt; + if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() && + ST->isNonStreamingSVEorSME2Available()) + return std::nullopt; Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext())); InstructionCost Cost = getCastInstrCost(Instruction::FPExt, PromotedTy, Ty, @@ -4210,6 +4213,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ISD == ISD::FDIV || ISD == ISD::FREM) if (auto PromotedCost = getFP16BF16PromoteCost( Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true, + // There is not native support for fdiv/frem even with +sve-b16b16. + /*CanUseSVE=*/ISD != ISD::FDIV && ISD != ISD::FREM, [&](Type *PromotedTy) { return getArithmeticInstrCost(Opcode, PromotedTy, CostKind, Op1Info, Op2Info); @@ -4624,7 +4629,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost( if (Opcode == Instruction::FCmp) { if (auto PromotedCost = getFP16BF16PromoteCost( ValTy, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/false, - [&](Type *PromotedTy) { + // TODO: Consider costing SVE FCMPs. + /*CanUseSVE=*/false, [&](Type *PromotedTy) { InstructionCost Cost = getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind, Op1Info, Op2Info); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index e62fdb6786843..d189f563f99a1 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -456,11 +456,10 @@ class AArch64TTIImpl final : public BasicTTIImplBase { /// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the /// architecture features are not present. - std::optional - getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, - TTI::OperandValueInfo Op1Info, - TTI::OperandValueInfo Op2Info, bool IncludeTrunc, - std::function InstCost) const; + std::optional getFP16BF16PromoteCost( + Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, + TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, + std::function InstCost) const; InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll index d9e26dc47b53f..a735640311ff6 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll @@ -33,11 +33,17 @@ define void @fadd() { } define void @fadd_bf16() { -; CHECK-LABEL: 'fadd_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'fadd_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fadd_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %NXV4BF16 = fadd poison, poison %NXV8BF16 = fadd poison, poison @@ -76,11 +82,17 @@ define void @fsub() { } define void @fsub_bf16() { -; CHECK-LABEL: 'fsub_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'fsub_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fsub_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %NXV4BF16 = fsub poison, poison %NXV8BF16 = fsub poison, poison @@ -160,11 +172,17 @@ define void @fmul() { } define void @fmul_bf16() { -; CHECK-LABEL: 'fmul_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul poison, poison -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-BASE-LABEL: 'fmul_bf16' +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul poison, poison +; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-BF16-LABEL: 'fmul_bf16' +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul poison, poison +; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %NXV4BF16 = fmul poison, poison %NXV8BF16 = fmul poison, poison