[CostModel] Modify BasicTTI getCastInstrCost

Fix the assumption that all bitcasts of the same type sizes are free. We now only assume that bitcasts between ints and ptrs of the same size are free. This allows TTImpl to just call the concrete implementation of getCastInstrCost. Differential Revision: https://reviews.llvm.org/D78918
llvm · May 13, 2020 · 6bbad72 · 6bbad72
1 parent 49e6c19
commit 6bbad72
Show file tree

Hide file tree

Showing 6 changed files with 54 additions and 52 deletions.
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -871,12 +871,9 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
     case Instruction::IntToPtr:
     case Instruction::PtrToInt:
     case Instruction::Trunc:
-      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free ||
-          TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
-        return TTI::TCC_Free;
-      break;
     case Instruction::BitCast:
-      if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free)
+      if (TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) ==
+          TTI::TCC_Free)
         return TTI::TCC_Free;
       break;
     case Instruction::FPExt:

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -692,6 +692,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
   unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
                             TTI::TargetCostKind CostKind,
                             const Instruction *I = nullptr) {
+    if (BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I) == 0)
+      return 0;
+
     const TargetLoweringBase *TLI = getTLI();
     int ISD = TLI->InstructionOpcodeToISD(Opcode);
     assert(ISD && "Invalid opcode");
@@ -700,6 +703,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
     unsigned SrcSize = SrcLT.second.getSizeInBits();
     unsigned DstSize = DstLT.second.getSizeInBits();
+    bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
+    bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
 
     switch (Opcode) {
     default:
@@ -710,8 +715,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
         return 0;
       LLVM_FALLTHROUGH;
     case Instruction::BitCast:
-      // Bitcast between types that are legalized to the same type are free.
-      if (SrcLT.first == DstLT.first && SrcSize == DstSize)
+      // Bitcast between types that are legalized to the same type are free and
+      // assume int to/from ptr of the same size is also free.
+      if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
+          SrcSize == DstSize)
         return 0;
       break;
     case Instruction::ZExt:
@@ -748,10 +755,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
 
     // Handle scalar conversions.
     if (!SrcVTy && !DstVTy) {
-      // Scalar bitcasts are usually free.
-      if (Opcode == Instruction::BitCast)
-        return 0;
-
       // Just check the op cost. If the operation is legal then assume it costs
       // 1.
       if (!TLI->isOperationExpand(ISD, DstLT.second))

diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll
@@ -677,12 +677,12 @@ define i32 @bitcasts() {
 ; CHECK-LABEL: 'bitcasts'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %a = bitcast i32 undef to i32

diff --git a/llvm/test/Analysis/CostModel/ARM/cast.ll b/llvm/test/Analysis/CostModel/ARM/cast.ll
@@ -1868,56 +1868,56 @@ define i32 @bitcasts() {
 ; CHECK-NEON-LABEL: 'bitcasts'
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-MVE-LABEL: 'bitcasts'
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-MAIN-LABEL: 'bitcasts'
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8M-BASE-LABEL: 'bitcasts'
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f = bitcast double undef to i64
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-V8R-LABEL: 'bitcasts'
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %a = bitcast i32 undef to i32
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %b = bitcast float undef to float
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %c = bitcast i32 undef to float
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %d = bitcast float undef to i32
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %e = bitcast i64 undef to double
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %f = bitcast double undef to i64
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %g = bitcast half undef to i16
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %h = bitcast i16 undef to half
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %c = bitcast i32 undef to float
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %d = bitcast float undef to i32
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %e = bitcast i64 undef to double
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f = bitcast double undef to i64
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %g = bitcast half undef to i16
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %h = bitcast i16 undef to half
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %a = bitcast i32 undef to i32

diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -1571,7 +1571,7 @@ define void @test_scatter_16i32(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i3
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
-; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %imask = bitcast i16 %mask to <16 x i1>
+; AVX512-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1>
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 ; AVX512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;

diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-merge.ll
@@ -111,13 +111,15 @@ define <4 x float> @PR16739_byval(<4 x float>* nocapture readonly dereferenceabl
 ; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[X]], i64 0, i64 2
 ; CHECK-NEXT:    [[T3:%.*]] = bitcast float* [[T2]] to i64*
 ; CHECK-NEXT:    [[T4:%.*]] = load i64, i64* [[T3]], align 8
-; CHECK-NEXT:    [[T5:%.*]] = trunc i64 [[T1]] to i32
-; CHECK-NEXT:    [[T6:%.*]] = bitcast i32 [[T5]] to float
-; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> undef, float [[T6]], i32 0
 ; CHECK-NEXT:    [[T8:%.*]] = lshr i64 [[T1]], 32
-; CHECK-NEXT:    [[T9:%.*]] = trunc i64 [[T8]] to i32
-; CHECK-NEXT:    [[T10:%.*]] = bitcast i32 [[T9]] to float
-; CHECK-NEXT:    [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[T10]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[T1]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[T8]], i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <2 x float>
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
+; CHECK-NEXT:    [[T7:%.*]] = insertelement <4 x float> undef, float [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
+; CHECK-NEXT:    [[T11:%.*]] = insertelement <4 x float> [[T7]], float [[TMP6]], i32 1
 ; CHECK-NEXT:    [[T12:%.*]] = trunc i64 [[T4]] to i32
 ; CHECK-NEXT:    [[T13:%.*]] = bitcast i32 [[T12]] to float
 ; CHECK-NEXT:    [[T14:%.*]] = insertelement <4 x float> [[T11]], float [[T13]], i32 2