[InstCombine] combineLoadToOperationType(): don't fold int<->ptr cast into load

LebedevRI · LebedevRI · commit 544a6aa2674e · 2020-10-11T20:24:28.000+03:00
And another step towards transforms not introducing inttoptr and/or ptrtoint casts that weren't there already. As we've been establishing (see D88788/D88789), if there is a int<->ptr cast, it basically must stay as-is, we can't do much with it. I've looked, and the most source of new such casts being introduces, as far as i can tell, is this transform, which, ironically, tries to reduce count of casts.. On vanilla llvm test-suite + RawSpeed, @ `-O3`, this results in -33.58% less `IntToPtr`s (19014 -> 12629) and +76.20% more `PtrToInt`s (18589 -> 32753), which is an increase of +20.69% in total. However just on RawSpeed, where i know there are basically none `IntToPtr` in the original source code, this results in -99.27% less `IntToPtr`s (2724 -> 20) and +82.92% more `PtrToInt`s (4513 -> 8255). which is again an increase of 14.34% in total. To me this does seem like the step in the right direction, we end up with strictly less `IntToPtr`, but strictly more `PtrToInt`, which seems like a reasonable trade-off. See https://reviews.llvm.org/D88860 / https://reviews.llvm.org/D88995 for some more discussion on the subject. (Eventually, `CastInst::isNoopCast()`/`CastInst::isEliminableCastPair` should be taught about this, yes) Reviewed By: nlopes, nikic Differential Revision: https://reviews.llvm.org/D88979
diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -27,20 +27,20 @@ typedef struct {
 
 // Minimum slot size is 4 bytes, so address needs rounding up to multiple of 8.
 long long test_longlong(OneLongLong input, va_list *mylist) {
-// CHECK-LABEL: define i64 @test_longlong(i64 %input
-// CHECK: [[STARTPTR:%.*]] = bitcast i8** %mylist to i32*
-// CHECK: [[START:%.*]] = load i32, i32* [[STARTPTR]]
-
-// CHECK: [[ALIGN_TMP:%.*]] = add i32 [[START]], 7
-// CHECK: [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -8
-// CHECK: [[ALIGNED_ADDR:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
-// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_ADDR]], i32 8
-// CHECK: store i8* [[NEXT]], i8** %mylist
-
-// CHECK: [[ADDR_STRUCT:%.*]] = inttoptr i32 [[ALIGNED]] to %struct.OneLongLong*
-// CHECK: [[ADDR_I64:%.*]] = getelementptr inbounds %struct.OneLongLong, %struct.OneLongLong* [[ADDR_STRUCT]], i32 0, i32 0
-// CHECK: [[RES:%.*]] = load i64, i64* [[ADDR_I64]]
-// CHECK: ret i64 [[RES]]
+  // CHECK-LABEL: define i64 @test_longlong(i64 %input
+  // CHECK: [[STARTPTR:%.*]] = load i8*, i8** %mylist
+  // CHECK: [[START:%.*]] = ptrtoint i8* [[STARTPTR]] to i32
+
+  // CHECK: [[ALIGN_TMP:%.*]] = add i32 [[START]], 7
+  // CHECK: [[ALIGNED:%.*]] = and i32 [[ALIGN_TMP]], -8
+  // CHECK: [[ALIGNED_ADDR:%.*]] = inttoptr i32 [[ALIGNED]] to i8*
+  // CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, i8* [[ALIGNED_ADDR]], i32 8
+  // CHECK: store i8* [[NEXT]], i8** %mylist
+
+  // CHECK: [[ADDR_STRUCT:%.*]] = inttoptr i32 [[ALIGNED]] to %struct.OneLongLong*
+  // CHECK: [[ADDR_I64:%.*]] = getelementptr inbounds %struct.OneLongLong, %struct.OneLongLong* [[ADDR_STRUCT]], i32 0, i32 0
+  // CHECK: [[RES:%.*]] = load i64, i64* [[ADDR_I64]]
+  // CHECK: ret i64 [[RES]]
 
   return va_arg(*mylist, OneLongLong).a;
 }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -557,12 +557,12 @@ static Instruction *combineLoadToOperationType(InstCombinerImpl &IC,
   const DataLayout &DL = IC.getDataLayout();
 
   // Fold away bit casts of the loaded value by loading the desired type.
-  // We can do this for BitCastInsts as well as casts from and to pointer types,
-  // as long as those are noops (i.e., the source or dest type have the same
-  // bitwidth as the target's pointers).
+  // Note that we should not do this for pointer<->integer casts,
+  // because that would result in type punning.
   if (LI.hasOneUse())
     if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
-      if (CI->isNoopCast(DL))
+      if (CI->isNoopCast(DL) && LI.getType()->isPtrOrPtrVectorTy() ==
+                                    CI->getDestTy()->isPtrOrPtrVectorTy())
         if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
           LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
           CI->replaceAllUsesWith(NewLoad);
diff --git a/llvm/test/Transforms/InstCombine/PR30597.ll b/llvm/test/Transforms/InstCombine/PR30597.ll
@@ -23,9 +23,9 @@ entry-block:
 define i64* @function(i64* noalias nocapture readonly dereferenceable(8)) {
 ; CHECK-LABEL: @function(
 ; CHECK-NEXT:  entry-block:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0:%.*]] to i64**
-; CHECK-NEXT:    [[LOADED1:%.*]] = load i64*, i64** [[TMP1]], align 8, !nonnull !0
-; CHECK-NEXT:    ret i64* [[LOADED1]]
+; CHECK-NEXT:    [[LOADED:%.*]] = load i64, i64* [[TMP0:%.*]], align 8, [[RNG0:!range !.*]]
+; CHECK-NEXT:    [[INTTOPTR:%.*]] = inttoptr i64 [[LOADED]] to i64*
+; CHECK-NEXT:    ret i64* [[INTTOPTR]]
 ;
 entry-block:
   %loaded = load i64, i64* %0, align 8, !range !1
diff --git a/llvm/test/Transforms/InstCombine/intptr1.ll b/llvm/test/Transforms/InstCombine/intptr1.ll
@@ -7,12 +7,12 @@ define void @test1(float* %a, float* readnone %a_end, i64* %b.i64) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[B_I64:%.*]] to float**
-; CHECK-NEXT:    [[B1:%.*]] = load float*, float** [[TMP0]], align 8
+; CHECK-NEXT:    [[B:%.*]] = load i64, i64* [[B_I64:%.*]], align 8
+; CHECK-NEXT:    [[B_PTR:%.*]] = inttoptr i64 [[B]] to float*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_PTR]], [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
@@ -114,11 +114,13 @@ define void @test2(float* %a, float* readnone %a_end, float** %b.float) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[B1:%.*]] = load float*, float** [[B_FLOAT:%.*]], align 8
+; CHECK-NEXT:    [[B_I64:%.*]] = bitcast float** [[B_FLOAT:%.*]] to i64*
+; CHECK-NEXT:    [[B:%.*]] = load i64, i64* [[B_I64]], align 8
+; CHECK-NEXT:    [[B_PTR:%.*]] = inttoptr i64 [[B]] to float*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_PTR]], [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
@@ -164,12 +166,13 @@ define void @test3(float* %a, float* readnone %a_end, i8** %b.i8p) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8** [[B_I8P:%.*]] to float**
-; CHECK-NEXT:    [[B1:%.*]] = load float*, float** [[TMP0]], align 8
+; CHECK-NEXT:    [[B_I64:%.*]] = bitcast i8** [[B_I8P:%.*]] to i64*
+; CHECK-NEXT:    [[B:%.*]] = load i64, i64* [[B_I64]], align 8
+; CHECK-NEXT:    [[B_PTR:%.*]] = inttoptr i64 [[B]] to float*
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B1]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_PTR]], [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
@@ -215,15 +218,15 @@ define void @test4(float* %a, float* readnone %a_end, float** %b.float) {
 ; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult float* [[A:%.*]], [[A_END:%.*]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[B_F12:%.*]] = load float*, float** [[B_FLOAT:%.*]], align 8
+; CHECK-NEXT:    [[B_F:%.*]] = load float*, float** [[B_FLOAT:%.*]], align 8
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[A_ADDR_03:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[A]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[B_ADDR_02_PTR:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_F12]], [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[B_ADDR_02_PTR]], align 4
+; CHECK-NEXT:    [[B_ADDR_02_IN:%.*]] = phi float* [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[B_F]], [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[B_ADDR_02_IN]], align 4
 ; CHECK-NEXT:    [[MUL_I:%.*]] = fmul float [[TMP1]], 4.200000e+01
 ; CHECK-NEXT:    store float [[MUL_I]], float* [[A_ADDR_03]], align 4
-; CHECK-NEXT:    [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_PTR]], i64 1
+; CHECK-NEXT:    [[ADD]] = getelementptr inbounds float, float* [[B_ADDR_02_IN]], i64 1
 ; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[A_ADDR_03]], i64 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult float* [[INCDEC_PTR]], [[A_END]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast32.ll b/llvm/test/Transforms/InstCombine/load-bitcast32.ll
@@ -24,9 +24,10 @@ entry:
 define i32* @test2(i8* %x) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32**
-; CHECK-NEXT:    [[B1:%.*]] = load i32*, i32** [[TMP0]], align 4
-; CHECK-NEXT:    ret i32* [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[C:%.*]] = inttoptr i32 [[B]] to i32*
+; CHECK-NEXT:    ret i32* [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i32*
@@ -39,9 +40,10 @@ entry:
 define i64* @test3(i8* %x) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64**
-; CHECK-NEXT:    [[B1:%.*]] = load i64*, i64** [[TMP0]], align 4
-; CHECK-NEXT:    ret i64* [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32*
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[C:%.*]] = inttoptr i32 [[B]] to i64*
+; CHECK-NEXT:    ret i64* [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i32*
@@ -54,9 +56,10 @@ entry:
 define i64 @test4(i8* %x) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[C:%.*]] = zext i32 [[B1]] to i64
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i64**
+; CHECK-NEXT:    [[B:%.*]] = load i64*, i64** [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i64* [[B]] to i32
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[TMP0]] to i64
 ; CHECK-NEXT:    ret i64 [[C]]
 ;
 entry:
@@ -70,9 +73,10 @@ entry:
 define i32 @test5(i8* %x) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    ret i32 [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32**
+; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[A]], align 4
+; CHECK-NEXT:    [[C:%.*]] = ptrtoint i32* [[B]] to i32
+; CHECK-NEXT:    ret i32 [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i32**
@@ -85,9 +89,10 @@ entry:
 define i64 @test6(i8* %x) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i32*
-; CHECK-NEXT:    [[B1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[C:%.*]] = zext i32 [[B1]] to i64
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32**
+; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[A]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i32* [[B]] to i32
+; CHECK-NEXT:    [[C:%.*]] = zext i32 [[TMP0]] to i64
 ; CHECK-NEXT:    ret i64 [[C]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/load-bitcast64.ll b/llvm/test/Transforms/InstCombine/load-bitcast64.ll
@@ -7,9 +7,10 @@ target datalayout = "p:64:64:64-i64:32:32"
 define i64* @test1(i8* %x) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64**
-; CHECK-NEXT:    [[B1:%.*]] = load i64*, i64** [[TMP0]], align 4
-; CHECK-NEXT:    ret i64* [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i64*
+; CHECK-NEXT:    [[B:%.*]] = load i64, i64* [[A]], align 4
+; CHECK-NEXT:    [[C:%.*]] = inttoptr i64 [[B]] to i64*
+; CHECK-NEXT:    ret i64* [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i64*
@@ -56,9 +57,10 @@ entry:
 define i64 @test4(i8* %x) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT:    ret i64 [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i64**
+; CHECK-NEXT:    [[B:%.*]] = load i64*, i64** [[A]], align 8
+; CHECK-NEXT:    [[C:%.*]] = ptrtoint i64* [[B]] to i64
+; CHECK-NEXT:    ret i64 [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i64**
@@ -71,9 +73,10 @@ entry:
 define i32 @test5(i8* %x) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT:    [[C:%.*]] = trunc i64 [[B1]] to i32
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32**
+; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[A]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    [[C:%.*]] = trunc i64 [[TMP0]] to i32
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
 entry:
@@ -87,9 +90,10 @@ entry:
 define i64 @test6(i8* %x) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[X:%.*]] to i64*
-; CHECK-NEXT:    [[B1:%.*]] = load i64, i64* [[TMP0]], align 8
-; CHECK-NEXT:    ret i64 [[B1]]
+; CHECK-NEXT:    [[A:%.*]] = bitcast i8* [[X:%.*]] to i32**
+; CHECK-NEXT:    [[B:%.*]] = load i32*, i32** [[A]], align 8
+; CHECK-NEXT:    [[C:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    ret i64 [[C]]
 ;
 entry:
   %a = bitcast i8* %x to i32**
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -79,10 +79,10 @@ define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
 ; CHECK-NEXT:    [[CALL50:%.*]] = call i8* @__memmove_chk(i8* [[B]], i8* [[A]], i64 [[ADD180]], i64 [[YO107]])
 ; CHECK-NEXT:    [[STRLEN:%.*]] = call i64 @strlen(i8* nonnull dereferenceable(1) [[B]])
 ; CHECK-NEXT:    [[STRCHR1:%.*]] = getelementptr i8, i8* [[B]], i64 [[STRLEN]]
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8** [[C:%.*]] to i64*
-; CHECK-NEXT:    [[D2:%.*]] = load i64, i64* [[TMP0]], align 8
+; CHECK-NEXT:    [[D:%.*]] = load i8*, i8** [[C:%.*]], align 8
+; CHECK-NEXT:    [[SUB182:%.*]] = ptrtoint i8* [[D]] to i64
 ; CHECK-NEXT:    [[SUB183:%.*]] = ptrtoint i8* [[B]] to i64
-; CHECK-NEXT:    [[SUB184:%.*]] = sub i64 [[D2]], [[SUB183]]
+; CHECK-NEXT:    [[SUB184:%.*]] = sub i64 [[SUB182]], [[SUB183]]
 ; CHECK-NEXT:    [[ADD52_I_I:%.*]] = add nsw i64 [[SUB184]], 1
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[STRCHR1]], i8 0, i64 [[ADD52_I_I]], i1 false)
 ; CHECK-NEXT:    ret i32 4