diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index 616f02ec13601..b91f20b710a2d 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -opaque-pointers=0 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr9 < %s | FileCheck %s -define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) { +define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, i64 %7) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 @@ -43,20 +43,20 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5 ; CHECK-NEXT: cmpd 6, 27 ; CHECK-NEXT: bge 0, .LBB0_2 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: add 23, 6, 12 -; CHECK-NEXT: add 22, 6, 30 -; CHECK-NEXT: add 25, 6, 28 +; CHECK-NEXT: add 25, 6, 12 ; CHECK-NEXT: add 24, 6, 8 ; CHECK-NEXT: sldi 26, 6, 3 -; CHECK-NEXT: sldi 25, 25, 3 +; CHECK-NEXT: sldi 23, 25, 3 +; CHECK-NEXT: add 25, 6, 30 ; CHECK-NEXT: sldi 24, 24, 3 -; CHECK-NEXT: sldi 23, 23, 3 -; CHECK-NEXT: sldi 22, 22, 3 ; CHECK-NEXT: add 26, 4, 26 -; CHECK-NEXT: add 25, 29, 25 +; CHECK-NEXT: sldi 22, 25, 3 +; CHECK-NEXT: add 25, 6, 28 ; CHECK-NEXT: add 24, 29, 24 ; CHECK-NEXT: add 23, 3, 23 +; CHECK-NEXT: sldi 25, 25, 3 ; CHECK-NEXT: add 22, 3, 22 +; CHECK-NEXT: add 25, 29, 25 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_5: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 @@ -136,73 +136,73 @@ define void @foo(double* readonly %0, double* %1, i64 %2, i64 %3, i64 %4, i64 %5 23: ; preds = %15, %23 %24 = phi i64 [ %12, %15 ], [ %91, %23 ] - %25 = getelementptr inbounds double, double* %1, i64 %24 - %26 = load double, double* %25, align 8 + %25 = getelementptr inbounds double, ptr %1, i64 %24 + %26 = load double, ptr %25, align 8 %27 = add nsw i64 %24, %16 - %28 = getelementptr inbounds double, double* %0, i64 %27 - %29 = load double, double* %28, align 8 + %28 = getelementptr inbounds double, ptr %0, i64 %27 + %29 = load double, ptr %28, align 8 %30 = fadd double %26, %29 %31 = add nsw i64 %27, 1 - %32 = getelementptr inbounds double, double* %0, i64 %31 - %33 = load double, double* %32, align 8 + %32 = getelementptr inbounds double, ptr %0, i64 %31 + %33 = load double, ptr %32, align 8 %34 = fadd double %30, %33 %35 = add nsw i64 %27, 2 - %36 = getelementptr inbounds double, double* %0, i64 %35 - %37 = load double, double* %36, align 8 + %36 = getelementptr inbounds double, ptr %0, i64 %35 + %37 = load double, ptr %36, align 8 %38 = fadd double %34, %37 %39 = add nsw i64 %27, 3 - %40 = getelementptr inbounds double, double* %0, i64 %39 - %41 = load double, double* %40, align 8 + %40 = getelementptr inbounds double, ptr %0, i64 %39 + %41 = load double, ptr %40, align 8 %42 = fadd double %38, %41 %43 = add nsw i64 %24, %18 - %44 = getelementptr inbounds double, double* %0, i64 %43 - %45 = load double, double* %44, align 8 + %44 = getelementptr inbounds double, ptr %0, i64 %43 + %45 = load double, ptr %44, align 8 %46 = fadd double %42, %45 %47 = add nsw i64 %43, 1 - %48 = getelementptr inbounds double, double* %0, i64 %47 - %49 = load double, double* %48, align 8 + %48 = getelementptr inbounds double, ptr %0, i64 %47 + %49 = load double, ptr %48, align 8 %50 = fadd double %46, %49 %51 = add nsw i64 %43, 2 - %52 = getelementptr inbounds double, double* %0, i64 %51 - %53 = load double, double* %52, align 8 + %52 = getelementptr inbounds double, ptr %0, i64 %51 + %53 = load double, ptr %52, align 8 %54 = fadd double %50, %53 %55 = add nsw i64 %43, 3 - %56 = getelementptr inbounds double, double* %0, i64 %55 - %57 = load double, double* %56, align 8 + %56 = getelementptr inbounds double, ptr %0, i64 %55 + %57 = load double, ptr %56, align 8 %58 = fadd double %54, %57 %59 = add nsw i64 %24, %20 - %60 = getelementptr inbounds double, double* %0, i64 %59 - %61 = load double, double* %60, align 8 + %60 = getelementptr inbounds double, ptr %0, i64 %59 + %61 = load double, ptr %60, align 8 %62 = fadd double %58, %61 %63 = add nsw i64 %59, 1 - %64 = getelementptr inbounds double, double* %0, i64 %63 - %65 = load double, double* %64, align 8 + %64 = getelementptr inbounds double, ptr %0, i64 %63 + %65 = load double, ptr %64, align 8 %66 = fadd double %62, %65 %67 = add nsw i64 %59, 2 - %68 = getelementptr inbounds double, double* %0, i64 %67 - %69 = load double, double* %68, align 8 + %68 = getelementptr inbounds double, ptr %0, i64 %67 + %69 = load double, ptr %68, align 8 %70 = fadd double %66, %69 %71 = add nsw i64 %59, 3 - %72 = getelementptr inbounds double, double* %0, i64 %71 - %73 = load double, double* %72, align 8 + %72 = getelementptr inbounds double, ptr %0, i64 %71 + %73 = load double, ptr %72, align 8 %74 = fadd double %70, %73 %75 = add nsw i64 %24, %22 - %76 = getelementptr inbounds double, double* %0, i64 %75 - %77 = load double, double* %76, align 8 + %76 = getelementptr inbounds double, ptr %0, i64 %75 + %77 = load double, ptr %76, align 8 %78 = fadd double %74, %77 %79 = add nsw i64 %75, 1 - %80 = getelementptr inbounds double, double* %0, i64 %79 - %81 = load double, double* %80, align 8 + %80 = getelementptr inbounds double, ptr %0, i64 %79 + %81 = load double, ptr %80, align 8 %82 = fadd double %78, %81 %83 = add nsw i64 %75, 2 - %84 = getelementptr inbounds double, double* %0, i64 %83 - %85 = load double, double* %84, align 8 + %84 = getelementptr inbounds double, ptr %0, i64 %83 + %85 = load double, ptr %84, align 8 %86 = fadd double %82, %85 %87 = add nsw i64 %75, 3 - %88 = getelementptr inbounds double, double* %0, i64 %87 - %89 = load double, double* %88, align 8 + %88 = getelementptr inbounds double, ptr %0, i64 %87 + %89 = load double, ptr %88, align 8 %90 = fadd double %86, %89 - store double %90, double* %25, align 8 + store double %90, ptr %25, align 8 %91 = add nsw i64 %24, %7 %92 = icmp slt i64 %91, %13 br i1 %92, label %23, label %93 diff --git a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll index 36ac5c7f9c8f0..b209d5809c573 100644 --- a/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll +++ b/llvm/test/CodeGen/PowerPC/memcmp-mergeexpand.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -opaque-pointers=0 -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=PPC64LE +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-gnu-linux < %s | FileCheck %s -check-prefix=PPC64LE ; This tests interaction between MergeICmp and ExpandMemCmp. @@ -10,25 +10,24 @@ define zeroext i1 @opeq1( ; PPC64LE: # %bb.0: # %"entry+land.rhs.i" ; PPC64LE-NEXT: ld 3, 0(3) ; PPC64LE-NEXT: ld 4, 0(4) -; PPC64LE-NEXT: xor 3, 3, 4 -; PPC64LE-NEXT: cntlzd 3, 3 -; PPC64LE-NEXT: rldicl 3, 3, 58, 63 +; PPC64LE-NEXT: cmpd 3, 4 +; PPC64LE-NEXT: li 3, 0 +; PPC64LE-NEXT: li 4, 1 +; PPC64LE-NEXT: iseleq 3, 4, 3 ; PPC64LE-NEXT: blr - %"struct.std::pair"* nocapture readonly dereferenceable(8) %a, - %"struct.std::pair"* nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { + ptr nocapture readonly dereferenceable(8) %a, + ptr nocapture readonly dereferenceable(8) %b) local_unnamed_addr #0 { entry: - %first.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 0 - %0 = load i32, i32* %first.i, align 4 - %first1.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 0 - %1 = load i32, i32* %first1.i, align 4 + %0 = load i32, ptr %a, align 4 + %1 = load i32, ptr %b, align 4 %cmp.i = icmp eq i32 %0, %1 br i1 %cmp.i, label %land.rhs.i, label %opeq1.exit land.rhs.i: - %second.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %a, i64 0, i32 1 - %2 = load i32, i32* %second.i, align 4 - %second2.i = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %b, i64 0, i32 1 - %3 = load i32, i32* %second2.i, align 4 + %second.i = getelementptr inbounds %"struct.std::pair", ptr %a, i64 0, i32 1 + %2 = load i32, ptr %second.i, align 4 + %second2.i = getelementptr inbounds %"struct.std::pair", ptr %b, i64 0, i32 1 + %3 = load i32, ptr %second2.i, align 4 %cmp3.i = icmp eq i32 %2, %3 br label %opeq1.exit diff --git a/llvm/test/CodeGen/PowerPC/pr47373.ll b/llvm/test/CodeGen/PowerPC/pr47373.ll index e56197f493df7..26a299ec9c2ad 100644 --- a/llvm/test/CodeGen/PowerPC/pr47373.ll +++ b/llvm/test/CodeGen/PowerPC/pr47373.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -opaque-pointers=0 -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \ +; RUN: llc -mtriple=powerpc64-unknown-freebsd13.0 -verify-machineinstrs \ ; RUN: -mcpu=ppc64 -ppc-asm-full-reg-names < %s | FileCheck %s -@a = local_unnamed_addr global float* null, align 8 +@a = local_unnamed_addr global ptr null, align 8 ; Function Attrs: nounwind define void @d() local_unnamed_addr #0 { @@ -40,9 +40,9 @@ define void @d() local_unnamed_addr #0 { ; CHECK-NEXT: rlwinm r5, r4, 0, 0, 29 ; CHECK-NEXT: li r7, 15 ; CHECK-NEXT: addi r6, r5, -4 -; CHECK-NEXT: addi r8, r1, 144 +; CHECK-NEXT: addi r8, r1, 128 ; CHECK-NEXT: rldicl r6, r6, 62, 2 -; CHECK-NEXT: addi r9, r1, 128 +; CHECK-NEXT: addi r9, r1, 144 ; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: addi r10, r1, 160 ; CHECK-NEXT: mtctr r6 @@ -62,20 +62,20 @@ define void @d() local_unnamed_addr #0 { ; CHECK-NEXT: stvx v2, 0, r8 ; CHECK-NEXT: vperm v2, v3, v5, v4 ; CHECK-NEXT: stvx v2, 0, r9 -; CHECK-NEXT: lfs f0, 156(r1) -; CHECK-NEXT: lfs f1, 140(r1) +; CHECK-NEXT: lfs f0, 140(r1) +; CHECK-NEXT: lfs f1, 156(r1) ; CHECK-NEXT: fdivs f0, f1, f0 -; CHECK-NEXT: lfs f1, 136(r1) +; CHECK-NEXT: lfs f1, 152(r1) ; CHECK-NEXT: stfs f0, 172(r1) -; CHECK-NEXT: lfs f0, 152(r1) +; CHECK-NEXT: lfs f0, 136(r1) ; CHECK-NEXT: fdivs f0, f1, f0 -; CHECK-NEXT: lfs f1, 132(r1) +; CHECK-NEXT: lfs f1, 148(r1) ; CHECK-NEXT: stfs f0, 168(r1) -; CHECK-NEXT: lfs f0, 148(r1) +; CHECK-NEXT: lfs f0, 132(r1) ; CHECK-NEXT: fdivs f0, f1, f0 -; CHECK-NEXT: lfs f1, 128(r1) +; CHECK-NEXT: lfs f1, 144(r1) ; CHECK-NEXT: stfs f0, 164(r1) -; CHECK-NEXT: lfs f0, 144(r1) +; CHECK-NEXT: lfs f0, 128(r1) ; CHECK-NEXT: fdivs f0, f1, f0 ; CHECK-NEXT: stfs f0, 160(r1) ; CHECK-NEXT: lvx v2, 0, r10 @@ -111,9 +111,9 @@ define void @d() local_unnamed_addr #0 { ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr entry: - %0 = load float*, float** @a, align 8 - %call = call signext i32 bitcast (i32 (...)* @c to i32 ()*)() #2 - %call1 = call float* bitcast (float* (...)* @b to float* ()*)() #2 + %0 = load ptr, ptr @a, align 8 + %call = call signext i32 @c() #2 + %call1 = call ptr @b() #2 %cmp11 = icmp sgt i32 %call, 0 br i1 %cmp11, label %for.body.preheader, label %for.end @@ -123,10 +123,10 @@ for.body.preheader: ; preds = %entry br i1 %min.iters.check, label %for.body.preheader18, label %vector.memcheck vector.memcheck: ; preds = %for.body.preheader - %scevgep = getelementptr float, float* %0, i64 %wide.trip.count - %scevgep15 = getelementptr float, float* %call1, i64 %wide.trip.count - %bound0 = icmp ult float* %0, %scevgep15 - %bound1 = icmp ult float* %call1, %scevgep + %scevgep = getelementptr float, ptr %0, i64 %wide.trip.count + %scevgep15 = getelementptr float, ptr %call1, i64 %wide.trip.count + %bound0 = icmp ult ptr %0, %scevgep15 + %bound1 = icmp ult ptr %call1, %scevgep %found.conflict = and i1 %bound0, %bound1 br i1 %found.conflict, label %for.body.preheader18, label %vector.ph @@ -136,18 +136,15 @@ vector.ph: ; preds = %vector.memcheck vector.body: ; preds = %vector.body, %vector.ph %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %1 = getelementptr inbounds float, float* %call1, i64 %index - %2 = bitcast float* %1 to <4 x float>* - %wide.load = load <4 x float>, <4 x float>* %2, align 4 - %3 = getelementptr inbounds float, float* %0, i64 %index - %4 = bitcast float* %3 to <4 x float>* - %wide.load17 = load <4 x float>, <4 x float>* %4, align 4 - %5 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load - %6 = bitcast float* %3 to <4 x float>* - store <4 x float> %5, <4 x float>* %6, align 4 + %1 = getelementptr inbounds float, ptr %call1, i64 %index + %wide.load = load <4 x float>, ptr %1, align 4 + %2 = getelementptr inbounds float, ptr %0, i64 %index + %wide.load17 = load <4 x float>, ptr %2, align 4 + %3 = fdiv reassoc nsz arcp afn <4 x float> %wide.load17, %wide.load + store <4 x float> %3, ptr %2, align 4 %index.next = add i64 %index, 4 - %7 = icmp eq i64 %index.next, %n.vec - br i1 %7, label %middle.block, label %vector.body + %4 = icmp eq i64 %index.next, %n.vec + br i1 %4, label %middle.block, label %vector.body middle.block: ; preds = %vector.body %cmp.n = icmp eq i64 %n.vec, %wide.trip.count @@ -159,12 +156,12 @@ for.body.preheader18: ; preds = %middle.block, %vect for.body: ; preds = %for.body.preheader18, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader18 ] - %arrayidx = getelementptr inbounds float, float* %call1, i64 %indvars.iv - %8 = load float, float* %arrayidx, align 4 - %arrayidx3 = getelementptr inbounds float, float* %0, i64 %indvars.iv - %9 = load float, float* %arrayidx3, align 4 - %div = fdiv reassoc nsz arcp afn float %9, %8 - store float %div, float* %arrayidx3, align 4 + %arrayidx = getelementptr inbounds float, ptr %call1, i64 %indvars.iv + %5 = load float, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds float, ptr %0, i64 %indvars.iv + %6 = load float, ptr %arrayidx3, align 4 + %div = fdiv reassoc nsz arcp afn float %6, %5 + store float %div, ptr %arrayidx3, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.end, label %for.body @@ -175,6 +172,6 @@ for.end: ; preds = %for.body, %middle.b declare signext i32 @c(...) local_unnamed_addr #1 -declare float* @b(...) local_unnamed_addr #1 +declare ptr @b(...) local_unnamed_addr #1 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll index 27f2438bea8c6..912a74ba8df8f 100644 --- a/llvm/test/CodeGen/PowerPC/prefer-dqform.ll +++ b/llvm/test/CodeGen/PowerPC/prefer-dqform.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -opaque-pointers=0 -verify-machineinstrs -disable-ppc-instr-form-prep=true -mcpu=pwr9 < %s \ +; RUN: llc -verify-machineinstrs -disable-ppc-instr-form-prep=true -mcpu=pwr9 < %s \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck %s -check-prefix=CHECK-P9 -; RUN: llc -opaque-pointers=0 -verify-machineinstrs -disable-ppc-instr-form-prep=true -mcpu=pwr10 < %s \ +; RUN: llc -verify-machineinstrs -disable-ppc-instr-form-prep=true -mcpu=pwr10 < %s \ ; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck %s -check-prefix=CHECK-P10 target triple = "powerpc64le-unknown-linux-gnu" @@ -10,7 +10,7 @@ target triple = "powerpc64le-unknown-linux-gnu" %_elem_type_of_x = type <{ double }> %_elem_type_of_y = type <{ double }> -define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) %.m, i32* noalias dereferenceable(4) %.n, [0 x %_elem_type_of_a]* %.a, i32* noalias dereferenceable(4) %.lda, [0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_y]* noalias %.y) { +define void @test(ptr dereferenceable(4) %.ial, ptr noalias dereferenceable(4) %.m, ptr noalias dereferenceable(4) %.n, ptr %.a, ptr noalias dereferenceable(4) %.lda, ptr noalias %.x, ptr noalias %.y) { ; CHECK-P9-LABEL: test: ; CHECK-P9: # %bb.0: # %test_entry ; CHECK-P9-NEXT: andi. r3, r6, 15 @@ -31,19 +31,19 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) ; CHECK-P9-NEXT: bgtlr cr0 ; CHECK-P9-NEXT: # %bb.1: # %_loop_2_do_.lr.ph ; CHECK-P9-NEXT: extswsli r5, r5, 3 -; CHECK-P9-NEXT: extsw r10, r4 -; CHECK-P9-NEXT: lwa r4, 0(r7) ; CHECK-P9-NEXT: add r5, r8, r5 ; CHECK-P9-NEXT: addi r8, r5, -8 -; CHECK-P9-NEXT: addi r5, r3, 1 -; CHECK-P9-NEXT: sub r3, r10, r3 -; CHECK-P9-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P9-NEXT: lwz r5, 0(r7) +; CHECK-P9-NEXT: extsw r7, r4 +; CHECK-P9-NEXT: rldic r4, r3, 3, 29 +; CHECK-P9-NEXT: sub r3, r7, r3 +; CHECK-P9-NEXT: addi r10, r4, 8 ; CHECK-P9-NEXT: lxvdsx vs0, 0, r8 -; CHECK-P9-NEXT: sub r4, r5, r4 -; CHECK-P9-NEXT: sldi r5, r5, 3 +; CHECK-P9-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P9-NEXT: extswsli r5, r5, 3 ; CHECK-P9-NEXT: addi r3, r3, 1 -; CHECK-P9-NEXT: sldi r4, r4, 3 -; CHECK-P9-NEXT: add r5, r9, r5 +; CHECK-P9-NEXT: sub r4, r10, r5 +; CHECK-P9-NEXT: add r5, r9, r10 ; CHECK-P9-NEXT: mtctr r3 ; CHECK-P9-NEXT: add r4, r6, r4 ; CHECK-P9-NEXT: .p2align 4 @@ -83,21 +83,21 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) ; CHECK-P10-NEXT: bgtlr cr0 ; CHECK-P10-NEXT: # %bb.1: # %_loop_2_do_.lr.ph ; CHECK-P10-NEXT: extswsli r5, r5, 3 -; CHECK-P10-NEXT: extsw r10, r4 -; CHECK-P10-NEXT: lwa r4, 0(r7) ; CHECK-P10-NEXT: add r5, r8, r5 ; CHECK-P10-NEXT: addi r8, r5, -8 -; CHECK-P10-NEXT: addi r5, r3, 1 -; CHECK-P10-NEXT: sub r3, r10, r3 -; CHECK-P10-NEXT: sub r4, r5, r4 -; CHECK-P10-NEXT: rldicl r3, r3, 60, 4 -; CHECK-P10-NEXT: sldi r5, r5, 3 -; CHECK-P10-NEXT: add r5, r9, r5 +; CHECK-P10-NEXT: lwz r5, 0(r7) +; CHECK-P10-NEXT: extsw r7, r4 +; CHECK-P10-NEXT: rldic r4, r3, 3, 29 +; CHECK-P10-NEXT: addi r10, r4, 8 +; CHECK-P10-NEXT: sub r3, r7, r3 ; CHECK-P10-NEXT: lxvdsx vs0, 0, r8 +; CHECK-P10-NEXT: rldicl r3, r3, 60, 4 +; CHECK-P10-NEXT: extswsli r5, r5, 3 ; CHECK-P10-NEXT: addi r3, r3, 1 -; CHECK-P10-NEXT: sldi r4, r4, 3 -; CHECK-P10-NEXT: add r4, r6, r4 +; CHECK-P10-NEXT: sub r4, r10, r5 +; CHECK-P10-NEXT: add r5, r9, r10 ; CHECK-P10-NEXT: mtctr r3 +; CHECK-P10-NEXT: add r4, r6, r4 ; CHECK-P10-NEXT: .p2align 4 ; CHECK-P10-NEXT: .LBB0_2: # %_loop_2_do_ ; CHECK-P10-NEXT: # @@ -116,34 +116,33 @@ define void @test(i32* dereferenceable(4) %.ial, i32* noalias dereferenceable(4) ; CHECK-P10-NEXT: blr ; FIXME: use pair load/store instructions lxvp/stxvp test_entry: - %_conv5 = ptrtoint [0 x %_elem_type_of_a]* %.a to i64 + %_conv5 = ptrtoint ptr %.a to i64 %_andi_tmp = and i64 %_conv5, 15 %_equ_tmp = icmp eq i64 %_andi_tmp, 0 %. = select i1 %_equ_tmp, i32 1, i32 2 - %_val_m_ = load i32, i32* %.m, align 4 + %_val_m_ = load i32, ptr %.m, align 4 %_sub_tmp9 = sub nsw i32 1, %. %_add_tmp10 = add i32 %_sub_tmp9, %_val_m_ %_mod_tmp = srem i32 %_add_tmp10, 16 %_sub_tmp11 = sub i32 %_val_m_, %_mod_tmp - %_val_n_ = load i32, i32* %.n, align 4 - %x_rvo_based_addr_17 = getelementptr inbounds [0 x %_elem_type_of_x], [0 x %_elem_type_of_x]* %.x, i64 0, i64 -1 + %_val_n_ = load i32, ptr %.n, align 4 + %x_rvo_based_addr_17 = getelementptr inbounds [0 x %_elem_type_of_x], ptr %.x, i64 0, i64 -1 %_div_tmp = sdiv i32 %_val_n_, 2 %_conv16 = sext i32 %_div_tmp to i64 - %_ind_cast = getelementptr inbounds %_elem_type_of_x, %_elem_type_of_x* %x_rvo_based_addr_17, i64 %_conv16, i32 0 - %_val_x_ = load double, double* %_ind_cast, align 8 + %_ind_cast = getelementptr inbounds %_elem_type_of_x, ptr %x_rvo_based_addr_17, i64 %_conv16, i32 0 + %_val_x_ = load double, ptr %_ind_cast, align 8 %.splatinsert = insertelement <2 x double> undef, double %_val_x_, i32 0 %.splat = shufflevector <2 x double> %.splatinsert, <2 x double> undef, <2 x i32> zeroinitializer %_grt_tmp21 = icmp sgt i32 %., %_sub_tmp11 br i1 %_grt_tmp21, label %_return_bb, label %_loop_2_do_.lr.ph _loop_2_do_.lr.ph: ; preds = %test_entry - %_val_lda_ = load i32, i32* %.lda, align 4 + %_val_lda_ = load i32, ptr %.lda, align 4 %_conv = sext i32 %_val_lda_ to i64 %_mult_tmp = shl nsw i64 %_conv, 3 %_sub_tmp4 = sub nuw nsw i64 -8, %_mult_tmp - %y_rvo_based_addr_19 = getelementptr inbounds [0 x %_elem_type_of_y], [0 x %_elem_type_of_y]* %.y, i64 0, i64 -1 - %a_byte_ptr_ = bitcast [0 x %_elem_type_of_a]* %.a to i8* - %a_rvo_based_addr_ = getelementptr inbounds i8, i8* %a_byte_ptr_, i64 %_sub_tmp4 + %y_rvo_based_addr_19 = getelementptr inbounds [0 x %_elem_type_of_y], ptr %.y, i64 0, i64 -1 + %a_rvo_based_addr_ = getelementptr inbounds i8, ptr %.a, i64 %_sub_tmp4 %0 = zext i32 %. to i64 %1 = sext i32 %_sub_tmp11 to i64 br label %_loop_2_do_ @@ -151,22 +150,18 @@ _loop_2_do_.lr.ph: ; preds = %test_entry _loop_2_do_: ; preds = %_loop_2_do_.lr.ph, %_loop_2_do_ %indvars.iv = phi i64 [ %0, %_loop_2_do_.lr.ph ], [ %indvars.iv.next, %_loop_2_do_ ] %_ix_x_len19 = shl nuw nsw i64 %indvars.iv, 3 - %y_ix_dim_0_20 = getelementptr inbounds %_elem_type_of_y, %_elem_type_of_y* %y_rvo_based_addr_19, i64 %indvars.iv - %2 = bitcast %_elem_type_of_y* %y_ix_dim_0_20 to <2 x double>* - %3 = load <2 x double>, <2 x double>* %2, align 1 - %4 = getelementptr %_elem_type_of_y, %_elem_type_of_y* %y_ix_dim_0_20, i64 2 - %5 = bitcast %_elem_type_of_y* %4 to <2 x double>* - %6 = load <2 x double>, <2 x double>* %5, align 1 - %a_ix_dim_1_ = getelementptr inbounds i8, i8* %a_rvo_based_addr_, i64 %_ix_x_len19 - %7 = bitcast i8* %a_ix_dim_1_ to <2 x double>* - %8 = load <2 x double>, <2 x double>* %7, align 1 - %9 = getelementptr i8, i8* %a_ix_dim_1_, i64 16 - %10 = bitcast i8* %9 to <2 x double>* - %11 = load <2 x double>, <2 x double>* %10, align 1 - %12 = tail call nsz contract <2 x double> @llvm.fma.v2f64(<2 x double> %8, <2 x double> %3, <2 x double> %3) - %13 = tail call nsz contract <2 x double> @llvm.fma.v2f64(<2 x double> %11, <2 x double> %.splat, <2 x double> %6) - store <2 x double> %12, <2 x double>* %2, align 1 - store <2 x double> %13, <2 x double>* %5, align 1 + %y_ix_dim_0_20 = getelementptr inbounds %_elem_type_of_y, ptr %y_rvo_based_addr_19, i64 %indvars.iv + %2 = load <2 x double>, ptr %y_ix_dim_0_20, align 1 + %3 = getelementptr %_elem_type_of_y, ptr %y_ix_dim_0_20, i64 2 + %4 = load <2 x double>, ptr %3, align 1 + %a_ix_dim_1_ = getelementptr inbounds i8, ptr %a_rvo_based_addr_, i64 %_ix_x_len19 + %5 = load <2 x double>, ptr %a_ix_dim_1_, align 1 + %6 = getelementptr i8, ptr %a_ix_dim_1_, i64 16 + %7 = load <2 x double>, ptr %6, align 1 + %8 = tail call nsz contract <2 x double> @llvm.fma.v2f64(<2 x double> %5, <2 x double> %2, <2 x double> %2) + %9 = tail call nsz contract <2 x double> @llvm.fma.v2f64(<2 x double> %7, <2 x double> %.splat, <2 x double> %4) + store <2 x double> %8, ptr %y_ix_dim_0_20, align 1 + store <2 x double> %9, ptr %3, align 1 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 16 %_grt_tmp = icmp sgt i64 %indvars.iv.next, %1 br i1 %_grt_tmp, label %_return_bb, label %_loop_2_do_