206 changes: 91 additions & 115 deletions llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll

Large diffs are not rendered by default.

120 changes: 56 additions & 64 deletions llvm/test/Transforms/PhaseOrdering/X86/vector-reductions-expanded.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,10 @@
target triple = "x86_64--"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

define i32 @add_v4i32(i32* %p) #0 {
define i32 @add_v4i32(ptr %p) #0 {
; CHECK-LABEL: @add_v4i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
Expand All @@ -34,8 +33,8 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4, !tbaa !3
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4, !tbaa !3
%add = add nsw i32 %r.0, %0
br label %for.inc

Expand All @@ -47,11 +46,10 @@ for.end:
ret i32 %r.0
}

define signext i16 @mul_v8i16(i16* %p) #0 {
define signext i16 @mul_v8i16(ptr %p) #0 {
; CHECK-LABEL: @mul_v8i16(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>*
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[P:%.*]], align 2, !tbaa [[TBAA4:![0-9]+]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Expand All @@ -75,8 +73,8 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i16, i16* %p, i64 %idxprom
%0 = load i16, i16* %arrayidx, align 2, !tbaa !7
%arrayidx = getelementptr inbounds i16, ptr %p, i64 %idxprom
%0 = load i16, ptr %arrayidx, align 2, !tbaa !7
%conv = sext i16 %0 to i32
%conv1 = sext i16 %r.0 to i32
%mul = mul nsw i32 %conv1, %conv
Expand All @@ -91,11 +89,10 @@ for.end:
ret i16 %r.0
}

define signext i8 @or_v16i8(i8* %p) #0 {
define signext i8 @or_v16i8(ptr %p) #0 {
; CHECK-LABEL: @or_v16i8(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa [[TBAA6:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[P:%.*]], align 1, !tbaa [[TBAA6:![0-9]+]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Expand All @@ -121,8 +118,8 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom
%0 = load i8, i8* %arrayidx, align 1, !tbaa !9
%arrayidx = getelementptr inbounds i8, ptr %p, i64 %idxprom
%0 = load i8, ptr %arrayidx, align 1, !tbaa !9
%conv = sext i8 %0 to i32
%conv1 = sext i8 %r.0 to i32
%or = or i32 %conv1, %conv
Expand All @@ -137,11 +134,10 @@ for.end:
ret i8 %r.0
}

define i32 @smin_v4i32(i32* %p) #0 {
define i32 @smin_v4i32(ptr %p) #0 {
; CHECK-LABEL: @smin_v4i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
Expand All @@ -165,15 +161,15 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4, !tbaa !3
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4, !tbaa !3
%cmp1 = icmp slt i32 %0, %r.0
br i1 %cmp1, label %cond.true, label %cond.false

cond.true:
%idxprom2 = sext i32 %i.0 to i64
%arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
%1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
%arrayidx3 = getelementptr inbounds i32, ptr %p, i64 %idxprom2
%1 = load i32, ptr %arrayidx3, align 4, !tbaa !3
br label %cond.end

cond.false:
Expand All @@ -191,11 +187,10 @@ for.end:
ret i32 %r.0
}

define i32 @umax_v4i32(i32* %p) #0 {
define i32 @umax_v4i32(ptr %p) #0 {
; CHECK-LABEL: @umax_v4i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
Expand All @@ -219,15 +214,15 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4, !tbaa !3
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4, !tbaa !3
%cmp1 = icmp ugt i32 %0, %r.0
br i1 %cmp1, label %cond.true, label %cond.false

cond.true:
%idxprom2 = sext i32 %i.0 to i64
%arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
%1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
%arrayidx3 = getelementptr inbounds i32, ptr %p, i64 %idxprom2
%1 = load i32, ptr %arrayidx3, align 4, !tbaa !3
br label %cond.end

cond.false:
Expand All @@ -245,11 +240,10 @@ for.end:
ret i32 %r.0
}

define float @fadd_v4i32(float* %p) #0 {
define float @fadd_v4i32(ptr %p) #0 {
; CHECK-LABEL: @fadd_v4i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7:![0-9]+]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
Expand All @@ -272,8 +266,8 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%0 = load float, float* %arrayidx, align 4, !tbaa !10
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4, !tbaa !10
%add = fadd fast float %r.0, %0
br label %for.inc

Expand All @@ -285,11 +279,10 @@ for.end:
ret float %r.0
}

define float @fmul_v4i32(float* %p) #0 {
define float @fmul_v4i32(ptr %p) #0 {
; CHECK-LABEL: @fmul_v4i32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
Expand All @@ -313,8 +306,8 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%0 = load float, float* %arrayidx, align 4, !tbaa !10
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4, !tbaa !10
%mul = fmul fast float %r.0, %0
br label %for.inc

Expand All @@ -326,11 +319,10 @@ for.end:
ret float %r.0
}

define float @fmin_v4f32(float* %p) #0 {
define float @fmin_v4f32(ptr %p) #0 {
; CHECK-LABEL: @fmin_v4f32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[P:%.*]], align 4, !tbaa [[TBAA7]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]]
Expand All @@ -354,15 +346,15 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
%0 = load float, float* %arrayidx, align 4, !tbaa !10
%arrayidx = getelementptr inbounds float, ptr %p, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4, !tbaa !10
%cmp1 = fcmp fast olt float %0, %r.0
br i1 %cmp1, label %cond.true, label %cond.false

cond.true:
%idxprom2 = sext i32 %i.0 to i64
%arrayidx3 = getelementptr inbounds float, float* %p, i64 %idxprom2
%1 = load float, float* %arrayidx3, align 4, !tbaa !10
%arrayidx3 = getelementptr inbounds float, ptr %p, i64 %idxprom2
%1 = load float, ptr %arrayidx3, align 4, !tbaa !10
br label %cond.end

cond.false:
Expand All @@ -384,19 +376,19 @@ define available_externally float @max(float %a, float %b) {
entry:
%a.addr = alloca float, align 4
%b.addr = alloca float, align 4
store float %a, float* %a.addr, align 4
store float %b, float* %b.addr, align 4
%0 = load float, float* %a.addr, align 4
%1 = load float, float* %b.addr, align 4
store float %a, ptr %a.addr, align 4
store float %b, ptr %b.addr, align 4
%0 = load float, ptr %a.addr, align 4
%1 = load float, ptr %b.addr, align 4
%cmp = fcmp nnan ninf nsz ogt float %0, %1
br i1 %cmp, label %cond.true, label %cond.false

cond.true: ; preds = %entry
%2 = load float, float* %a.addr, align 4
%2 = load float, ptr %a.addr, align 4
br label %cond.end

cond.false: ; preds = %entry
%3 = load float, float* %b.addr, align 4
%3 = load float, ptr %b.addr, align 4
br label %cond.end

cond.end: ; preds = %cond.false, %cond.true
Expand All @@ -406,10 +398,10 @@ cond.end: ; preds = %cond.false, %cond.t

; PR23116

define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) {
define float @findMax(ptr byval(<8 x float>) align 16 %0) {
; CHECK-LABEL: @findMax(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[V:%.*]] = load <8 x float>, <8 x float>* [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: [[V:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]]
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]]
Expand All @@ -424,29 +416,29 @@ define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) {
;
entry:
%v.addr = alloca <8 x float>, align 32
%v = load <8 x float>, <8 x float>* %0, align 16, !tbaa !3
store <8 x float> %v, <8 x float>* %v.addr, align 32, !tbaa !3
%1 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%v = load <8 x float>, ptr %0, align 16, !tbaa !3
store <8 x float> %v, ptr %v.addr, align 32, !tbaa !3
%1 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext = extractelement <8 x float> %1, i32 0
%2 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%2 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext1 = extractelement <8 x float> %2, i32 1
%call = call nnan ninf nsz float @max(float %vecext, float %vecext1)
%3 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%3 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext2 = extractelement <8 x float> %3, i32 2
%call3 = call nnan ninf nsz float @max(float %call, float %vecext2)
%4 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%4 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext4 = extractelement <8 x float> %4, i32 3
%call5 = call nnan ninf nsz float @max(float %call3, float %vecext4)
%5 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%5 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext6 = extractelement <8 x float> %5, i32 4
%call7 = call nnan ninf nsz float @max(float %call5, float %vecext6)
%6 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%6 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext8 = extractelement <8 x float> %6, i32 5
%call9 = call nnan ninf nsz float @max(float %call7, float %vecext8)
%7 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%7 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext10 = extractelement <8 x float> %7, i32 6
%call11 = call nnan ninf nsz float @max(float %call9, float %vecext10)
%8 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
%8 = load <8 x float>, ptr %v.addr, align 32, !tbaa !3
%vecext12 = extractelement <8 x float> %8, i32 7
%call13 = call nnan ninf nsz float @max(float %call11, float %vecext12)
ret float %call13
Expand Down
64 changes: 28 additions & 36 deletions llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,11 @@ define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x
; PR43953 - https://bugs.llvm.org/show_bug.cgi?id=43953
; We want to end up with a single reduction on the next 4 tests.

define i32 @TestVectorsEqual(i32* noalias %Vec0, i32* noalias %Vec1, i32 %Tolerance) {
define i32 @TestVectorsEqual(ptr noalias %Vec0, ptr noalias %Vec1, i32 %Tolerance) {
; CHECK-LABEL: @TestVectorsEqual(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[VEC0:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[VEC0:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC1:%.*]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP4]], i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
Expand All @@ -86,11 +84,11 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %Component.0 to i64
%arrayidx = getelementptr inbounds i32, i32* %Vec0, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %Vec0, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4
%idxprom1 = sext i32 %Component.0 to i64
%arrayidx2 = getelementptr inbounds i32, i32* %Vec1, i64 %idxprom1
%1 = load i32, i32* %arrayidx2, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %Vec1, i64 %idxprom1
%1 = load i32, ptr %arrayidx2, align 4
%sub = sub nsw i32 %0, %1
%cmp3 = icmp sge i32 %sub, 0
br i1 %cmp3, label %cond.true, label %cond.false
Expand Down Expand Up @@ -118,13 +116,11 @@ for.end:
ret i32 %cond6
}

define i32 @TestVectorsEqual_alt(i32* noalias %Vec0, i32* noalias %Vec1, i32 %Tolerance) {
define i32 @TestVectorsEqual_alt(ptr noalias %Vec0, ptr noalias %Vec1, i32 %Tolerance) {
; CHECK-LABEL: @TestVectorsEqual_alt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[VEC0:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[VEC1:%.*]] to <4 x i32>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[VEC0:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC1:%.*]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
; CHECK-NEXT: [[CMP3_NOT:%.*]] = icmp ule i32 [[TMP5]], [[TOLERANCE:%.*]]
Expand All @@ -145,11 +141,11 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %Component.0 to i64
%arrayidx = getelementptr inbounds i32, i32* %Vec0, i64 %idxprom
%0 = load i32, i32* %arrayidx, align 4
%arrayidx = getelementptr inbounds i32, ptr %Vec0, i64 %idxprom
%0 = load i32, ptr %arrayidx, align 4
%idxprom1 = sext i32 %Component.0 to i64
%arrayidx2 = getelementptr inbounds i32, i32* %Vec1, i64 %idxprom1
%1 = load i32, i32* %arrayidx2, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %Vec1, i64 %idxprom1
%1 = load i32, ptr %arrayidx2, align 4
%sub = sub i32 %0, %1
%add = add i32 %sum.0, %sub
br label %for.inc
Expand All @@ -165,13 +161,11 @@ for.end:
ret i32 %cond
}

define i32 @TestVectorsEqualFP(float* noalias %Vec0, float* noalias %Vec1, float %Tolerance) {
define i32 @TestVectorsEqualFP(ptr noalias %Vec0, ptr noalias %Vec1, float %Tolerance) {
; CHECK-LABEL: @TestVectorsEqualFP(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[VEC0:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[VEC1:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[VEC0:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[VEC1:%.*]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = tail call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP5]])
Expand All @@ -193,11 +187,11 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %Component.0 to i64
%arrayidx = getelementptr inbounds float, float* %Vec0, i64 %idxprom
%0 = load float, float* %arrayidx, align 4
%arrayidx = getelementptr inbounds float, ptr %Vec0, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4
%idxprom1 = sext i32 %Component.0 to i64
%arrayidx2 = getelementptr inbounds float, float* %Vec1, i64 %idxprom1
%1 = load float, float* %arrayidx2, align 4
%arrayidx2 = getelementptr inbounds float, ptr %Vec1, i64 %idxprom1
%1 = load float, ptr %arrayidx2, align 4
%sub = fsub fast float %0, %1
%cmp3 = fcmp fast oge float %sub, 0.000000e+00
br i1 %cmp3, label %cond.true, label %cond.false
Expand Down Expand Up @@ -225,13 +219,11 @@ for.end:
ret i32 %cond5
}

define i32 @TestVectorsEqualFP_alt(float* noalias %Vec0, float* noalias %Vec1, float %Tolerance) {
define i32 @TestVectorsEqualFP_alt(ptr noalias %Vec0, ptr noalias %Vec1, float %Tolerance) {
; CHECK-LABEL: @TestVectorsEqualFP_alt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[VEC0:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[VEC1:%.*]] to <4 x float>*
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[VEC0:%.*]], align 4
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x float>, ptr [[VEC1:%.*]], align 4
; CHECK-NEXT: [[TMP4:%.*]] = fsub fast <4 x float> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = tail call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
; CHECK-NEXT: [[CMP3:%.*]] = fcmp fast ole float [[TMP5]], [[TOLERANCE:%.*]]
Expand All @@ -252,11 +244,11 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %Component.0 to i64
%arrayidx = getelementptr inbounds float, float* %Vec0, i64 %idxprom
%0 = load float, float* %arrayidx, align 4
%arrayidx = getelementptr inbounds float, ptr %Vec0, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4
%idxprom1 = sext i32 %Component.0 to i64
%arrayidx2 = getelementptr inbounds float, float* %Vec1, i64 %idxprom1
%1 = load float, float* %arrayidx2, align 4
%arrayidx2 = getelementptr inbounds float, ptr %Vec1, i64 %idxprom1
%1 = load float, ptr %arrayidx2, align 4
%sub = fsub fast float %0, %1
%add = fadd fast float %sum.0, %sub
br label %for.inc
Expand Down
50 changes: 25 additions & 25 deletions llvm/test/Transforms/PhaseOrdering/assume-explosion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,70 +23,70 @@ define void @f() #0 {
; CHECK-LABEL: @f(
;
entry:
store i32 5, i32* @c, align 4, !tbaa !3
store i32 5, ptr @c, align 4, !tbaa !3
br label %for.cond

for.cond:
%0 = load i32, i32* @c, align 4, !tbaa !3
%0 = load i32, ptr @c, align 4, !tbaa !3
%cmp = icmp sle i32 %0, 63
br i1 %cmp, label %for.body, label %for.end34

for.body:
store i16 9, i16* @e, align 2, !tbaa !7
store i16 9, ptr @e, align 2, !tbaa !7
br label %for.cond1

for.cond1:
%1 = load i16, i16* @e, align 2, !tbaa !7
%1 = load i16, ptr @e, align 2, !tbaa !7
%conv = zext i16 %1 to i32
%cmp2 = icmp sle i32 %conv, 60
br i1 %cmp2, label %for.body4, label %for.end32

for.body4:
%2 = load i16, i16* @e, align 2, !tbaa !7
%2 = load i16, ptr @e, align 2, !tbaa !7
%conv5 = zext i16 %2 to i32
%3 = load i32, i32* @b, align 4, !tbaa !3
%3 = load i32, ptr @b, align 4, !tbaa !3
%xor = xor i32 %conv5, %3
%4 = load i32, i32* @d, align 4, !tbaa !3
%4 = load i32, ptr @d, align 4, !tbaa !3
%cmp6 = icmp ne i32 %xor, %4
br i1 %cmp6, label %if.then, label %if.end27

if.then:
%5 = load i32, i32* @a, align 4, !tbaa !3
%5 = load i32, ptr @a, align 4, !tbaa !3
%conv8 = sext i32 %5 to i64
%6 = inttoptr i64 %conv8 to i8*
store i8 3, i8* %6, align 1, !tbaa !9
%6 = inttoptr i64 %conv8 to ptr
store i8 3, ptr %6, align 1, !tbaa !9
br label %for.cond9

for.cond9:
%7 = load i8, i8* %6, align 1, !tbaa !9
%7 = load i8, ptr %6, align 1, !tbaa !9
%conv10 = sext i8 %7 to i32
%cmp11 = icmp sle i32 %conv10, 32
br i1 %cmp11, label %for.body13, label %for.end26

for.body13:
%8 = load i8, i8* %6, align 1, !tbaa !9
%8 = load i8, ptr %6, align 1, !tbaa !9
%tobool = icmp ne i8 %8, 0
br i1 %tobool, label %if.then14, label %if.end

if.then14:
store i8 1, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9
store i8 1, ptr @a, align 1, !tbaa !9
br label %for.cond15

for.cond15:
%9 = load i8, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9
%9 = load i8, ptr @a, align 1, !tbaa !9
%conv16 = sext i8 %9 to i32
%cmp17 = icmp sle i32 %conv16, 30
br i1 %cmp17, label %for.body19, label %for.end

for.body19:
%10 = load i32, i32* @c, align 4, !tbaa !3
%10 = load i32, ptr @c, align 4, !tbaa !3
%cmp20 = icmp eq i32 0, %10
%conv21 = zext i1 %cmp20 to i32
%11 = load i8, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9
%11 = load i8, ptr @a, align 1, !tbaa !9
%conv22 = sext i8 %11 to i32
%and = and i32 %conv22, %conv21
%conv23 = trunc i32 %and to i8
store i8 %conv23, i8* bitcast (i32* @a to i8*), align 1, !tbaa !9
store i8 %conv23, ptr @a, align 1, !tbaa !9
br label %for.cond15, !llvm.loop !10

for.end:
Expand All @@ -96,11 +96,11 @@ if.end:
br label %for.inc

for.inc:
%12 = load i8, i8* %6, align 1, !tbaa !9
%12 = load i8, ptr %6, align 1, !tbaa !9
%conv24 = sext i8 %12 to i32
%add = add nsw i32 %conv24, 1
%conv25 = trunc i32 %add to i8
store i8 %conv25, i8* %6, align 1, !tbaa !9
store i8 %conv25, ptr %6, align 1, !tbaa !9
br label %for.cond9, !llvm.loop !12

for.end26:
Expand All @@ -110,28 +110,28 @@ if.end27:
br label %for.inc28

for.inc28:
%13 = load i16, i16* @e, align 2, !tbaa !7
%13 = load i16, ptr @e, align 2, !tbaa !7
%conv29 = zext i16 %13 to i32
%add30 = add nsw i32 %conv29, 1
%conv31 = trunc i32 %add30 to i16
store i16 %conv31, i16* @e, align 2, !tbaa !7
store i16 %conv31, ptr @e, align 2, !tbaa !7
br label %for.cond1, !llvm.loop !13

for.end32:
br label %for.inc33

for.inc33:
%14 = load i32, i32* @c, align 4, !tbaa !3
%14 = load i32, ptr @c, align 4, !tbaa !3
%inc = add nsw i32 %14, 1
store i32 %inc, i32* @c, align 4, !tbaa !3
store i32 %inc, ptr @c, align 4, !tbaa !3
br label %for.cond, !llvm.loop !14

for.end34:
ret void
}

declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1

attributes #0 = { nounwind ssp uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
attributes #1 = { argmemonly nofree nosync nounwind willreturn }
Expand Down
40 changes: 19 additions & 21 deletions llvm/test/Transforms/PhaseOrdering/basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,22 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.6.7"

declare i8* @malloc(i64)
declare void @free(i8*)
declare ptr @malloc(i64)
declare void @free(ptr)

; PR2338
define void @test1() nounwind ssp {
; CHECK-LABEL: @test1(
; CHECK-NEXT: ret void
;
%retval = alloca i32, align 4
%i = alloca i8*, align 8
%call = call i8* @malloc(i64 1)
store i8* %call, i8** %i, align 8
%tmp = load i8*, i8** %i, align 8
store i8 1, i8* %tmp
%tmp1 = load i8*, i8** %i, align 8
call void @free(i8* %tmp1)
%i = alloca ptr, align 8
%call = call ptr @malloc(i64 1)
store ptr %call, ptr %i, align 8
%tmp = load ptr, ptr %i, align 8
store i8 1, ptr %tmp
%tmp1 = load ptr, ptr %i, align 8
call void @free(ptr %tmp1)
ret void

}
Expand All @@ -29,27 +29,25 @@ define void @test1() nounwind ssp {
;
; It it also important that %add is expressed as a multiple of %div so scalar
; evolution can recognize it.
define i32 @test2(i32 %a, i32* %p) nounwind uwtable ssp {
define i32 @test2(i32 %a, ptr %p) nounwind uwtable ssp {
; CHECK-LABEL: @test2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[A:%.*]], 2
; CHECK-NEXT: store i32 [[DIV]], i32* [[P:%.*]], align 4
; CHECK-NEXT: store i32 [[DIV]], ptr [[P:%.*]], align 4
; CHECK-NEXT: [[ADD:%.*]] = shl nuw nsw i32 [[DIV]], 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX1]], align 4
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX1]], align 4
; CHECK-NEXT: ret i32 0
;
entry:
%div = udiv i32 %a, 4
%arrayidx = getelementptr inbounds i32, i32* %p, i64 0
store i32 %div, i32* %arrayidx, align 4
store i32 %div, ptr %p, align 4
%add = add i32 %div, %div
%arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
store i32 %add, i32* %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %p, i64 1
%0 = load i32, i32* %arrayidx2, align 4
%arrayidx3 = getelementptr inbounds i32, i32* %p, i64 0
%1 = load i32, i32* %arrayidx3, align 4
%arrayidx1 = getelementptr inbounds i32, ptr %p, i64 1
store i32 %add, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %p, i64 1
%0 = load i32, ptr %arrayidx2, align 4
%1 = load i32, ptr %p, align 4
%mul = mul i32 2, %1
%sub = sub i32 %0, %mul
ret i32 %sub
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -136,10 +136,8 @@ define i32 @PR56119(i32 %e.coerce) {
;
entry:
%e = alloca %struct.a, align 4
%coerce.dive = getelementptr inbounds %struct.a, ptr %e, i32 0, i32 0
store i32 %e.coerce, ptr %coerce.dive, align 4
%b = getelementptr inbounds %struct.a, ptr %e, i32 0, i32 0
%0 = load i32, ptr %b, align 4
store i32 %e.coerce, ptr %e, align 4
%0 = load i32, ptr %e, align 4
%conv = trunc i32 %0 to i8
%conv1 = trunc i64 -1 to i8
%conv2 = zext i8 %conv to i32
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,36 @@
; RUN: opt -loop-rotate -instcombine -enable-knowledge-retention -S < %s | FileCheck %s
; RUN: opt -passes='loop(loop-rotate),instcombine' -enable-knowledge-retention -S < %s | FileCheck %s

%0 = type { %0* }
%0 = type { ptr }

define %0* @f1(%0* %i0) local_unnamed_addr {
define ptr @f1(ptr %i0) local_unnamed_addr {
; CHECK-LABEL: @f1(
; CHECK-NEXT: bb:
; CHECK: br label [[BB3:%.*]]
; CHECK: bb3:
; CHECK-NEXT: [[I1:%.*]] = phi %0* [ %i0, [[BB:%.*]] ], [ [[I5:%.*]], [[BB3]] ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(%0* [[I1]]) ]
; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[I1]], i64 0, i32 0
; CHECK-NEXT: [[I5]] = load %0*, %0** [[I4]], align 8
; CHECK-NEXT: [[I2:%.*]] = icmp eq %0* [[I5]], null
; CHECK-NEXT: [[I1:%.*]] = phi ptr [ %i0, [[BB:%.*]] ], [ [[I5:%.*]], [[BB3]] ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[I1]]) ]
; CHECK-NEXT: [[I5]] = load ptr, ptr [[I1]], align 8
; CHECK-NEXT: [[I2:%.*]] = icmp eq ptr [[I5]], null
; CHECK-NEXT: br i1 [[I2]], label [[BB6:%.*]], label [[BB3]]
; CHECK: bb6:
; CHECK-NEXT: ret %0* undef
; CHECK-NEXT: ret ptr undef
;
bb:
br label %bb1

bb1:
%i = phi %0* [ %i0, %bb ], [ %i5, %bb3 ]
%i2 = icmp eq %0* %i, null
%i = phi ptr [ %i0, %bb ], [ %i5, %bb3 ]
%i2 = icmp eq ptr %i, null
br i1 %i2, label %bb6, label %bb3

bb3:
call void @llvm.assume(i1 true) [ "nonnull"(%0* %i) ]
%i4 = getelementptr inbounds %0, %0* %i, i64 0, i32 0
%i5 = load %0*, %0** %i4, align 8
call void @llvm.assume(i1 true) [ "nonnull"(ptr %i) ]
%i5 = load ptr, ptr %i, align 8
br label %bb1

bb6:
ret %0* undef
ret ptr undef
}

declare void @llvm.assume(i1)
24 changes: 11 additions & 13 deletions llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,34 @@
@dummy = global i32 0
; CHECK: [[DUMMY:@.*]] = local_unnamed_addr global i32 0

define internal void @f(%struct.ss* byval(%struct.ss) align 8 %b, i32* byval(i32) align 4 %X) noinline nounwind {
define internal void @f(ptr byval(%struct.ss) align 8 %b, ptr byval(i32) align 4 %X) noinline nounwind {
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (i32 [[B_0:%.*]]){{[^#]*}} #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TEMP:%.*]] = add i32 [[B_0]], 1
; CHECK-NEXT: store i32 [[TEMP]], i32* [[DUMMY]], align 4
; CHECK-NEXT: store i32 [[TEMP]], ptr [[DUMMY]], align 4
; CHECK-NEXT: ret void
;
entry:
%temp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
%temp1 = load i32, i32* %temp, align 4
%temp1 = load i32, ptr %b, align 4
%temp2 = add i32 %temp1, 1
store i32 %temp2, i32* @dummy
store i32 %temp2, i32* %X
store i32 %temp2, ptr @dummy
store i32 %temp2, ptr %X
ret void
}

define i32 @test(i32* %X) {
define i32 @test(ptr %X) {
; CHECK-LABEL: define {{[^@]+}}@test
; CHECK-SAME: (i32* {{[^%]*}} [[X:%.*]]){{[^#]*}} #[[ATTR1:[0-9]+]] {
; CHECK-SAME: (ptr {{[^%]*}} [[X:%.*]]){{[^#]*}} #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: tail call {{.*}}void @f(i32 1)
; CHECK-NEXT: ret i32 0
;
entry:
%S = alloca %struct.ss, align 8
%temp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0
store i32 1, i32* %temp1, align 8
%temp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1
store i64 2, i64* %temp4, align 4
call void @f( %struct.ss* byval(%struct.ss) align 8 %S, i32* byval(i32) align 4 %X)
store i32 1, ptr %S, align 8
%temp4 = getelementptr %struct.ss, ptr %S, i32 0, i32 1
store i64 2, ptr %temp4, align 4
call void @f( ptr byval(%struct.ss) align 8 %S, ptr byval(i32) align 4 %X)
ret i32 0
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,125 +6,118 @@
; All these tests should optimize to a single comparison
; of the original argument with null. There should be no loops.

%struct.node = type { %struct.node*, i32 }
%struct.node = type { ptr, i32 }

define dso_local zeroext i1 @is_not_empty_variant1(%struct.node* %p) {
define dso_local zeroext i1 @is_not_empty_variant1(ptr %p) {
; ALL-LABEL: @is_not_empty_variant1(
; ALL-NEXT: entry:
; ALL-NEXT: [[TOBOOL_NOT3_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null
; ALL-NEXT: [[TOBOOL_NOT3_I:%.*]] = icmp ne ptr [[P:%.*]], null
; ALL-NEXT: ret i1 [[TOBOOL_NOT3_I]]
;
entry:
%p.addr = alloca %struct.node*, align 8
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = load %struct.node*, %struct.node** %p.addr, align 8
%call = call i32 @count_nodes_variant1(%struct.node* %0)
%p.addr = alloca ptr, align 8
store ptr %p, ptr %p.addr, align 8
%0 = load ptr, ptr %p.addr, align 8
%call = call i32 @count_nodes_variant1(ptr %0)
%cmp = icmp sgt i32 %call, 0
ret i1 %cmp
}

define internal i32 @count_nodes_variant1(%struct.node* %p) {
define internal i32 @count_nodes_variant1(ptr %p) {
entry:
%p.addr = alloca %struct.node*, align 8
%p.addr = alloca ptr, align 8
%size = alloca i32, align 4
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = bitcast i32* %size to i8*
store i32 0, i32* %size, align 4
store ptr %p, ptr %p.addr, align 8
store i32 0, ptr %size, align 4
br label %while.cond

while.cond:
%1 = load %struct.node*, %struct.node** %p.addr, align 8
%tobool = icmp ne %struct.node* %1, null
%0 = load ptr, ptr %p.addr, align 8
%tobool = icmp ne ptr %0, null
br i1 %tobool, label %while.body, label %while.end

while.body:
%2 = load %struct.node*, %struct.node** %p.addr, align 8
%next = getelementptr inbounds %struct.node, %struct.node* %2, i32 0, i32 0
%3 = load %struct.node*, %struct.node** %next, align 8
store %struct.node* %3, %struct.node** %p.addr, align 8
%4 = load i32, i32* %size, align 4
%inc = add nsw i32 %4, 1
store i32 %inc, i32* %size, align 4
%1 = load ptr, ptr %p.addr, align 8
%2 = load ptr, ptr %1, align 8
store ptr %2, ptr %p.addr, align 8
%3 = load i32, ptr %size, align 4
%inc = add nsw i32 %3, 1
store i32 %inc, ptr %size, align 4
br label %while.cond, !llvm.loop !0

while.end:
%5 = load i32, i32* %size, align 4
%6 = bitcast i32* %size to i8*
ret i32 %5
%4 = load i32, ptr %size, align 4
ret i32 %4
}

define dso_local zeroext i1 @is_not_empty_variant2(%struct.node* %p) {
define dso_local zeroext i1 @is_not_empty_variant2(ptr %p) {
; ALL-LABEL: @is_not_empty_variant2(
; ALL-NEXT: entry:
; ALL-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null
; ALL-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne ptr [[P:%.*]], null
; ALL-NEXT: ret i1 [[TOBOOL_NOT4_I]]
;
entry:
%p.addr = alloca %struct.node*, align 8
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = load %struct.node*, %struct.node** %p.addr, align 8
%call = call i64 @count_nodes_variant2(%struct.node* %0)
%p.addr = alloca ptr, align 8
store ptr %p, ptr %p.addr, align 8
%0 = load ptr, ptr %p.addr, align 8
%call = call i64 @count_nodes_variant2(ptr %0)
%cmp = icmp ugt i64 %call, 0
ret i1 %cmp
}

define internal i64 @count_nodes_variant2(%struct.node* %p) {
define internal i64 @count_nodes_variant2(ptr %p) {
entry:
%p.addr = alloca %struct.node*, align 8
%p.addr = alloca ptr, align 8
%size = alloca i64, align 8
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = bitcast i64* %size to i8*
store i64 0, i64* %size, align 8
store ptr %p, ptr %p.addr, align 8
store i64 0, ptr %size, align 8
br label %while.cond

while.cond:
%1 = load %struct.node*, %struct.node** %p.addr, align 8
%tobool = icmp ne %struct.node* %1, null
%0 = load ptr, ptr %p.addr, align 8
%tobool = icmp ne ptr %0, null
br i1 %tobool, label %while.body, label %while.end

while.body:
%2 = load %struct.node*, %struct.node** %p.addr, align 8
%next = getelementptr inbounds %struct.node, %struct.node* %2, i32 0, i32 0
%3 = load %struct.node*, %struct.node** %next, align 8
store %struct.node* %3, %struct.node** %p.addr, align 8
%4 = load i64, i64* %size, align 8
%inc = add i64 %4, 1
store i64 %inc, i64* %size, align 8
%5 = load i64, i64* %size, align 8
%cmp = icmp ne i64 %5, 0
%1 = load ptr, ptr %p.addr, align 8
%2 = load ptr, ptr %1, align 8
store ptr %2, ptr %p.addr, align 8
%3 = load i64, ptr %size, align 8
%inc = add i64 %3, 1
store i64 %inc, ptr %size, align 8
%4 = load i64, ptr %size, align 8
%cmp = icmp ne i64 %4, 0
call void @_ZL6assumeb(i1 zeroext %cmp)
br label %while.cond, !llvm.loop !2

while.end:
%6 = load i64, i64* %size, align 8
%7 = bitcast i64* %size to i8*
ret i64 %6
%5 = load i64, ptr %size, align 8
ret i64 %5
}

define dso_local zeroext i1 @is_not_empty_variant3(%struct.node* %p) {
define dso_local zeroext i1 @is_not_empty_variant3(ptr %p) {
; O3-LABEL: @is_not_empty_variant3(
; O3-NEXT: entry:
; O3-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null
; O3-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne ptr [[P:%.*]], null
; O3-NEXT: ret i1 [[TOBOOL_NOT4_I]]
;
; O2-LABEL: @is_not_empty_variant3(
; O2-NEXT: entry:
; O2-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne %struct.node* [[P:%.*]], null
; O2-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp ne ptr [[P:%.*]], null
; O2-NEXT: ret i1 [[TOBOOL_NOT4_I]]
;
; O1-LABEL: @is_not_empty_variant3(
; O1-NEXT: entry:
; O1-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp eq %struct.node* [[P:%.*]], null
; O1-NEXT: [[TOBOOL_NOT4_I:%.*]] = icmp eq ptr [[P:%.*]], null
; O1-NEXT: br i1 [[TOBOOL_NOT4_I]], label [[COUNT_NODES_VARIANT3_EXIT:%.*]], label [[WHILE_BODY_I:%.*]]
; O1: while.body.i:
; O1-NEXT: [[SIZE_06_I:%.*]] = phi i64 [ [[INC_I:%.*]], [[WHILE_BODY_I]] ], [ 0, [[ENTRY:%.*]] ]
; O1-NEXT: [[P_ADDR_05_I:%.*]] = phi %struct.node* [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY]] ]
; O1-NEXT: [[P_ADDR_05_I:%.*]] = phi ptr [ [[TMP0:%.*]], [[WHILE_BODY_I]] ], [ [[P]], [[ENTRY]] ]
; O1-NEXT: [[CMP_I:%.*]] = icmp ne i64 [[SIZE_06_I]], -1
; O1-NEXT: call void @llvm.assume(i1 [[CMP_I]])
; O1-NEXT: [[NEXT_I:%.*]] = getelementptr inbounds [[STRUCT_NODE:%.*]], %struct.node* [[P_ADDR_05_I]], i64 0, i32 0
; O1-NEXT: [[TMP0]] = load %struct.node*, %struct.node** [[NEXT_I]], align 8
; O1-NEXT: [[TMP0]] = load ptr, ptr [[P_ADDR_05_I]], align 8
; O1-NEXT: [[INC_I]] = add i64 [[SIZE_06_I]], 1
; O1-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq %struct.node* [[TMP0]], null
; O1-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq ptr [[TMP0]], null
; O1-NEXT: br i1 [[TOBOOL_NOT_I]], label [[COUNT_NODES_VARIANT3_EXIT_LOOPEXIT:%.*]], label [[WHILE_BODY_I]], !llvm.loop [[LOOP0:![0-9]+]]
; O1: count_nodes_variant3.exit.loopexit:
; O1-NEXT: [[PHI_CMP:%.*]] = icmp ne i64 [[INC_I]], 0
Expand All @@ -134,53 +127,50 @@ define dso_local zeroext i1 @is_not_empty_variant3(%struct.node* %p) {
; O1-NEXT: ret i1 [[SIZE_0_LCSSA_I]]
;
entry:
%p.addr = alloca %struct.node*, align 8
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = load %struct.node*, %struct.node** %p.addr, align 8
%call = call i64 @count_nodes_variant3(%struct.node* %0)
%p.addr = alloca ptr, align 8
store ptr %p, ptr %p.addr, align 8
%0 = load ptr, ptr %p.addr, align 8
%call = call i64 @count_nodes_variant3(ptr %0)
%cmp = icmp ugt i64 %call, 0
ret i1 %cmp
}

define internal i64 @count_nodes_variant3(%struct.node* %p) {
define internal i64 @count_nodes_variant3(ptr %p) {
entry:
%p.addr = alloca %struct.node*, align 8
%p.addr = alloca ptr, align 8
%size = alloca i64, align 8
store %struct.node* %p, %struct.node** %p.addr, align 8
%0 = bitcast i64* %size to i8*
store i64 0, i64* %size, align 8
store ptr %p, ptr %p.addr, align 8
store i64 0, ptr %size, align 8
br label %while.cond

while.cond:
%1 = load %struct.node*, %struct.node** %p.addr, align 8
%tobool = icmp ne %struct.node* %1, null
%0 = load ptr, ptr %p.addr, align 8
%tobool = icmp ne ptr %0, null
br i1 %tobool, label %while.body, label %while.end

while.body:
%2 = load i64, i64* %size, align 8
%cmp = icmp ne i64 %2, -1
%1 = load i64, ptr %size, align 8
%cmp = icmp ne i64 %1, -1
call void @_ZL6assumeb(i1 zeroext %cmp)
%3 = load %struct.node*, %struct.node** %p.addr, align 8
%next = getelementptr inbounds %struct.node, %struct.node* %3, i32 0, i32 0
%4 = load %struct.node*, %struct.node** %next, align 8
store %struct.node* %4, %struct.node** %p.addr, align 8
%5 = load i64, i64* %size, align 8
%inc = add i64 %5, 1
store i64 %inc, i64* %size, align 8
%2 = load ptr, ptr %p.addr, align 8
%3 = load ptr, ptr %2, align 8
store ptr %3, ptr %p.addr, align 8
%4 = load i64, ptr %size, align 8
%inc = add i64 %4, 1
store i64 %inc, ptr %size, align 8
br label %while.cond, !llvm.loop !3

while.end:
%6 = load i64, i64* %size, align 8
%7 = bitcast i64* %size to i8*
ret i64 %6
%5 = load i64, ptr %size, align 8
ret i64 %5
}

define internal void @_ZL6assumeb(i1 zeroext %expression) {
entry:
%expression.addr = alloca i8, align 1
%frombool = zext i1 %expression to i8
store i8 %frombool, i8* %expression.addr, align 1
%0 = load i8, i8* %expression.addr, align 1
store i8 %frombool, ptr %expression.addr, align 1
%0 = load i8, ptr %expression.addr, align 1
%tobool = trunc i8 %0 to i1
call void @llvm.assume(i1 %tobool)
ret void
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/PhaseOrdering/expect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
; backend can't undo. Expect lowering becomes metadata,
; and passes like SimplifyCFG should respect that.

define void @PR49336(i32 %delta, i32 %tag_type, i8* %ip) {
define void @PR49336(i32 %delta, i32 %tag_type, ptr %ip) {
; CHECK-LABEL: @PR49336(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[DELTA:%.*]], 0
Expand All @@ -15,33 +15,33 @@ define void @PR49336(i32 %delta, i32 %tag_type, i8* %ip) {
; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq i32 [[TAG_TYPE:%.*]], 0
; CHECK-NEXT: br i1 [[CMP1_NOT]], label [[IF_END3]], label [[IF_THEN2:%.*]]
; CHECK: if.then2:
; CHECK-NEXT: store i8 42, i8* [[IP:%.*]], align 1
; CHECK-NEXT: store i8 42, ptr [[IP:%.*]], align 1
; CHECK-NEXT: br label [[IF_END3]]
; CHECK: if.end3:
; CHECK-NEXT: ret void
;
entry:
%delta.addr = alloca i32, align 4
%tag_type.addr = alloca i32, align 4
%ip.addr = alloca i8*, align 8
store i32 %delta, i32* %delta.addr, align 4
store i32 %tag_type, i32* %tag_type.addr, align 4
store i8* %ip, i8** %ip.addr, align 8
%0 = load i32, i32* %delta.addr, align 4
%ip.addr = alloca ptr, align 8
store i32 %delta, ptr %delta.addr, align 4
store i32 %tag_type, ptr %tag_type.addr, align 4
store ptr %ip, ptr %ip.addr, align 8
%0 = load i32, ptr %delta.addr, align 4
%cmp = icmp slt i32 %0, 0
%conv = zext i1 %cmp to i64
%expval = call i64 @llvm.expect.i64(i64 %conv, i64 0)
%tobool = icmp ne i64 %expval, 0
br i1 %tobool, label %if.then, label %if.end3

if.then:
%1 = load i32, i32* %tag_type.addr, align 4
%1 = load i32, ptr %tag_type.addr, align 4
%cmp1 = icmp ne i32 %1, 0
br i1 %cmp1, label %if.then2, label %if.end

if.then2:
%2 = load i8*, i8** %ip.addr, align 8
store i8 42, i8* %2, align 1
%2 = load ptr, ptr %ip.addr, align 8
store i8 42, ptr %2, align 1
br label %if.end

if.end:
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/Transforms/PhaseOrdering/fast-reassociate-gvn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,75 +29,75 @@

define void @test3() {
; CHECK-LABEL: @test3(
; CHECK-NEXT: [[A:%.*]] = load float, float* @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, float* @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, float* @fc, align 4
; CHECK-NEXT: [[A:%.*]] = load float, ptr @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, ptr @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, ptr @fc, align 4
; CHECK-NEXT: [[T1:%.*]] = fadd fast float [[B]], [[A]]
; CHECK-NEXT: [[T2:%.*]] = fadd fast float [[T1]], [[C]]
; CHECK-NEXT: store float [[T2]], float* @fe, align 4
; CHECK-NEXT: store float [[T2]], float* @ff, align 4
; CHECK-NEXT: store float [[T2]], ptr @fe, align 4
; CHECK-NEXT: store float [[T2]], ptr @ff, align 4
; CHECK-NEXT: ret void
;
%A = load float, float* @fa
%B = load float, float* @fb
%C = load float, float* @fc
%A = load float, ptr @fa
%B = load float, ptr @fb
%C = load float, ptr @fc
%t1 = fadd fast float %A, %B
%t2 = fadd fast float %t1, %C
%t3 = fadd fast float %A, %C
%t4 = fadd fast float %t3, %B
; e = (a+b)+c;
store float %t2, float* @fe
store float %t2, ptr @fe
; f = (a+c)+b
store float %t4, float* @ff
store float %t4, ptr @ff
ret void
}

define void @test4() {
; CHECK-LABEL: @test4(
; CHECK-NEXT: [[A:%.*]] = load float, float* @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, float* @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, float* @fc, align 4
; CHECK-NEXT: [[A:%.*]] = load float, ptr @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, ptr @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, ptr @fc, align 4
; CHECK-NEXT: [[T1:%.*]] = fadd fast float [[B]], [[A]]
; CHECK-NEXT: [[T2:%.*]] = fadd fast float [[T1]], [[C]]
; CHECK-NEXT: store float [[T2]], float* @fe, align 4
; CHECK-NEXT: store float [[T2]], float* @ff, align 4
; CHECK-NEXT: store float [[T2]], ptr @fe, align 4
; CHECK-NEXT: store float [[T2]], ptr @ff, align 4
; CHECK-NEXT: ret void
;
%A = load float, float* @fa
%B = load float, float* @fb
%C = load float, float* @fc
%A = load float, ptr @fa
%B = load float, ptr @fb
%C = load float, ptr @fc
%t1 = fadd fast float %A, %B
%t2 = fadd fast float %C, %t1
%t3 = fadd fast float %C, %A
%t4 = fadd fast float %t3, %B
; e = c+(a+b)
store float %t2, float* @fe
store float %t2, ptr @fe
; f = (c+a)+b
store float %t4, float* @ff
store float %t4, ptr @ff
ret void
}

define void @test5() {
; CHECK-LABEL: @test5(
; CHECK-NEXT: [[A:%.*]] = load float, float* @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, float* @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, float* @fc, align 4
; CHECK-NEXT: [[A:%.*]] = load float, ptr @fa, align 4
; CHECK-NEXT: [[B:%.*]] = load float, ptr @fb, align 4
; CHECK-NEXT: [[C:%.*]] = load float, ptr @fc, align 4
; CHECK-NEXT: [[T1:%.*]] = fadd fast float [[B]], [[A]]
; CHECK-NEXT: [[T2:%.*]] = fadd fast float [[T1]], [[C]]
; CHECK-NEXT: store float [[T2]], float* @fe, align 4
; CHECK-NEXT: store float [[T2]], float* @ff, align 4
; CHECK-NEXT: store float [[T2]], ptr @fe, align 4
; CHECK-NEXT: store float [[T2]], ptr @ff, align 4
; CHECK-NEXT: ret void
;
%A = load float, float* @fa
%B = load float, float* @fb
%C = load float, float* @fc
%A = load float, ptr @fa
%B = load float, ptr @fb
%C = load float, ptr @fc
%t1 = fadd fast float %B, %A
%t2 = fadd fast float %C, %t1
%t3 = fadd fast float %C, %A
%t4 = fadd fast float %t3, %B
; e = c+(b+a)
store float %t2, float* @fe
store float %t2, ptr @fe
; f = (c+a)+b
store float %t4, float* @ff
store float %t4, ptr @ff
ret void
}
99 changes: 48 additions & 51 deletions llvm/test/Transforms/PhaseOrdering/gdce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,97 +10,94 @@
; CHECK-NOT: _ZN4BaseC2Ev
; CHECK-NOT: _ZN4BaseD0Ev

%class.Base = type { i32 (...)** }
%class.Base = type { ptr }

@_ZTV4Base = linkonce_odr unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI4Base to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD1Ev to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD0Ev to i8*)]
@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
@_ZTV4Base = linkonce_odr unnamed_addr constant [4 x ptr] [ptr null, ptr @_ZTI4Base, ptr @_ZN4BaseD1Ev, ptr @_ZN4BaseD0Ev]
@_ZTVN10__cxxabiv117__class_type_infoE = external global ptr
@_ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00"
@_ZTI4Base = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @_ZTS4Base, i32 0, i32 0) }
@_ZTI4Base = linkonce_odr unnamed_addr constant { ptr, ptr } { ptr getelementptr inbounds (ptr, ptr @_ZTVN10__cxxabiv117__class_type_infoE, i64 2), ptr @_ZTS4Base }

define i32 @main() uwtable ssp {
entry:
%retval = alloca i32, align 4
%b = alloca %class.Base, align 8
%cleanup.dest.slot = alloca i32
store i32 0, i32* %retval
call void @_ZN4BaseC1Ev(%class.Base* %b)
store i32 0, i32* %retval
store i32 1, i32* %cleanup.dest.slot
call void @_ZN4BaseD1Ev(%class.Base* %b)
%0 = load i32, i32* %retval
store i32 0, ptr %retval
call void @_ZN4BaseC1Ev(ptr %b)
store i32 0, ptr %retval
store i32 1, ptr %cleanup.dest.slot
call void @_ZN4BaseD1Ev(ptr %b)
%0 = load i32, ptr %retval
ret i32 %0
}

define linkonce_odr void @_ZN4BaseC1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
define linkonce_odr void @_ZN4BaseC1Ev(ptr %this) unnamed_addr uwtable ssp align 2 {
entry:
%this.addr = alloca %class.Base*, align 8
store %class.Base* %this, %class.Base** %this.addr, align 8
%this1 = load %class.Base*, %class.Base** %this.addr
call void @_ZN4BaseC2Ev(%class.Base* %this1)
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr
call void @_ZN4BaseC2Ev(ptr %this1)
ret void
}

define linkonce_odr void @_ZN4BaseD1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
define linkonce_odr void @_ZN4BaseD1Ev(ptr %this) unnamed_addr uwtable ssp align 2 {
entry:
%this.addr = alloca %class.Base*, align 8
store %class.Base* %this, %class.Base** %this.addr, align 8
%this1 = load %class.Base*, %class.Base** %this.addr
call void @_ZN4BaseD2Ev(%class.Base* %this1)
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr
call void @_ZN4BaseD2Ev(ptr %this1)
ret void
}

define linkonce_odr void @_ZN4BaseD2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
define linkonce_odr void @_ZN4BaseD2Ev(ptr %this) unnamed_addr nounwind uwtable ssp align 2 {
entry:
%this.addr = alloca %class.Base*, align 8
store %class.Base* %this, %class.Base** %this.addr, align 8
%this1 = load %class.Base*, %class.Base** %this.addr
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr
ret void
}

define linkonce_odr void @_ZN4BaseC2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
define linkonce_odr void @_ZN4BaseC2Ev(ptr %this) unnamed_addr nounwind uwtable ssp align 2 {
entry:
%this.addr = alloca %class.Base*, align 8
store %class.Base* %this, %class.Base** %this.addr, align 8
%this1 = load %class.Base*, %class.Base** %this.addr
%0 = bitcast %class.Base* %this1 to i8***
store i8** getelementptr inbounds ([4 x i8*], [4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0
%this.addr = alloca ptr, align 8
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr
store ptr getelementptr inbounds ([4 x ptr], ptr @_ZTV4Base, i64 0, i64 2), ptr %this1
ret void
}

define linkonce_odr void @_ZN4BaseD0Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
define linkonce_odr void @_ZN4BaseD0Ev(ptr %this) unnamed_addr uwtable ssp align 2 personality ptr @__gxx_personality_v0 {
entry:
%this.addr = alloca %class.Base*, align 8
%exn.slot = alloca i8*
%this.addr = alloca ptr, align 8
%exn.slot = alloca ptr
%ehselector.slot = alloca i32
store %class.Base* %this, %class.Base** %this.addr, align 8
%this1 = load %class.Base*, %class.Base** %this.addr
invoke void @_ZN4BaseD1Ev(%class.Base* %this1)
store ptr %this, ptr %this.addr, align 8
%this1 = load ptr, ptr %this.addr
invoke void @_ZN4BaseD1Ev(ptr %this1)
to label %invoke.cont unwind label %lpad

invoke.cont: ; preds = %entry
%0 = bitcast %class.Base* %this1 to i8*
call void @_ZdlPv(i8* %0) nounwind
call void @_ZdlPv(ptr %this1) nounwind
ret void

lpad: ; preds = %entry
%1 = landingpad { i8*, i32 }
%0 = landingpad { ptr, i32 }
cleanup
%2 = extractvalue { i8*, i32 } %1, 0
store i8* %2, i8** %exn.slot
%3 = extractvalue { i8*, i32 } %1, 1
store i32 %3, i32* %ehselector.slot
%4 = bitcast %class.Base* %this1 to i8*
call void @_ZdlPv(i8* %4) nounwind
%1 = extractvalue { ptr, i32 } %0, 0
store ptr %1, ptr %exn.slot
%2 = extractvalue { ptr, i32 } %0, 1
store i32 %2, ptr %ehselector.slot
call void @_ZdlPv(ptr %this1) nounwind
br label %eh.resume

eh.resume: ; preds = %lpad
%exn = load i8*, i8** %exn.slot
%sel = load i32, i32* %ehselector.slot
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
%lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
resume { i8*, i32 } %lpad.val2
%exn = load ptr, ptr %exn.slot
%sel = load i32, ptr %ehselector.slot
%lpad.val = insertvalue { ptr, i32 } undef, ptr %exn, 0
%lpad.val2 = insertvalue { ptr, i32 } %lpad.val, i32 %sel, 1
resume { ptr, i32 } %lpad.val2
}

declare i32 @__gxx_personality_v0(...)

declare void @_ZdlPv(i8*) nounwind
declare void @_ZdlPv(ptr) nounwind
22 changes: 11 additions & 11 deletions llvm/test/Transforms/PhaseOrdering/globalaa-retained.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"

@v = internal unnamed_addr global i32 0, align 4
@p = common global i32* null, align 8
@p = common global ptr null, align 8


; This test checks that a number of loads and stores are eliminated,
Expand Down Expand Up @@ -39,25 +39,25 @@ entry:
; Function Attrs: norecurse nounwind
define void @f(i32 %n) {
entry:
%0 = load i32, i32* @v, align 4
%0 = load i32, ptr @v, align 4
%inc = add nsw i32 %0, 1
store i32 %inc, i32* @v, align 4
%1 = load i32*, i32** @p, align 8
store i32 %n, i32* %1, align 4
%2 = load i32, i32* @v, align 4
store i32 %inc, ptr @v, align 4
%1 = load ptr, ptr @p, align 8
store i32 %n, ptr %1, align 4
%2 = load i32, ptr @v, align 4
%inc1 = add nsw i32 %2, 1
store i32 %inc1, i32* @v, align 4
store i32 %inc1, ptr @v, align 4
ret void
}

; check variable v is loaded/stored only once after optimization,
; which should be prove that globalsAA survives until the optimization
; that can use it to optimize away the duplicate load/stores on
; variable v.
; CHECK: load i32, i32* @v, align 4
; CHECK: store i32 {{.*}}, i32* @v, align 4
; CHECK-NOT: load i32, i32* @v, align 4
; CHECK-NOT: store i32 {{.*}}, i32* @v, align 4
; CHECK: load i32, ptr @v, align 4
; CHECK: store i32 {{.*}}, ptr @v, align 4
; CHECK-NOT: load i32, ptr @v, align 4
; CHECK-NOT: store i32 {{.*}}, ptr @v, align 4

; Same as @bar above, in case the functions are processed in reverse order.
define void @bar2() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,74 +9,74 @@ target datalayout = "e-p:64:64-p5:32:32-A5"
; heuristics, which are affected by the additional instructions of the
; alignment assumption.

define internal i1 @callee1(i1 %c, i64* align 8 %ptr) {
store volatile i64 0, i64* %ptr
define internal i1 @callee1(i1 %c, ptr align 8 %ptr) {
store volatile i64 0, ptr %ptr
ret i1 %c
}

define void @caller1(i1 %c, i64* align 1 %ptr) {
define void @caller1(i1 %c, ptr align 1 %ptr) {
; ASSUMPTIONS-OFF-LABEL: @caller1(
; ASSUMPTIONS-OFF-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]]
; ASSUMPTIONS-OFF: common.ret:
; ASSUMPTIONS-OFF-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ]
; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, i64* [[PTR:%.*]], align 8
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 [[DOTSINK]], i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 0, ptr [[PTR:%.*]], align 8
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 -1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 [[DOTSINK]], ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: ret void
; ASSUMPTIONS-OFF: false2:
; ASSUMPTIONS-OFF-NEXT: store volatile i64 1, i64* [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: store volatile i64 1, ptr [[PTR]], align 4
; ASSUMPTIONS-OFF-NEXT: br label [[COMMON_RET]]
;
; ASSUMPTIONS-ON-LABEL: @caller1(
; ASSUMPTIONS-ON-NEXT: br i1 [[C:%.*]], label [[COMMON_RET:%.*]], label [[FALSE2:%.*]]
; ASSUMPTIONS-ON: common.ret:
; ASSUMPTIONS-ON-NEXT: [[DOTSINK:%.*]] = phi i64 [ 3, [[FALSE2]] ], [ 2, [[TMP0:%.*]] ]
; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(i64* [[PTR:%.*]], i64 8) ]
; ASSUMPTIONS-ON-NEXT: store volatile i64 0, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 [[DOTSINK]], i64* [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 8) ]
; ASSUMPTIONS-ON-NEXT: store volatile i64 0, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 -1, ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: store volatile i64 [[DOTSINK]], ptr [[PTR]], align 8
; ASSUMPTIONS-ON-NEXT: ret void
; ASSUMPTIONS-ON: false2:
; ASSUMPTIONS-ON-NEXT: store volatile i64 1, i64* [[PTR]], align 4
; ASSUMPTIONS-ON-NEXT: store volatile i64 1, ptr [[PTR]], align 4
; ASSUMPTIONS-ON-NEXT: br label [[COMMON_RET]]
;
br i1 %c, label %true1, label %false1

true1:
%c2 = call i1 @callee1(i1 %c, i64* %ptr)
store volatile i64 -1, i64* %ptr
store volatile i64 -1, i64* %ptr
store volatile i64 -1, i64* %ptr
store volatile i64 -1, i64* %ptr
store volatile i64 -1, i64* %ptr
%c2 = call i1 @callee1(i1 %c, ptr %ptr)
store volatile i64 -1, ptr %ptr
store volatile i64 -1, ptr %ptr
store volatile i64 -1, ptr %ptr
store volatile i64 -1, ptr %ptr
store volatile i64 -1, ptr %ptr
br i1 %c2, label %true2, label %false2

false1:
store volatile i64 1, i64* %ptr
store volatile i64 1, ptr %ptr
br label %true1

true2:
store volatile i64 2, i64* %ptr
store volatile i64 2, ptr %ptr
ret void

false2:
store volatile i64 3, i64* %ptr
store volatile i64 3, ptr %ptr
ret void
}

; This test checks that alignment assumptions do not prevent SROA.
; See PR45763.

define internal void @callee2(i64* noalias sret(i64) align 32 %arg) {
store i64 0, i64* %arg, align 8
define internal void @callee2(ptr noalias sret(i64) align 32 %arg) {
store i64 0, ptr %arg, align 8
ret void
}

Expand All @@ -85,7 +85,7 @@ define amdgpu_kernel void @caller2() {
; CHECK-NEXT: ret void
;
%alloca = alloca i64, align 8, addrspace(5)
%cast = addrspacecast i64 addrspace(5)* %alloca to i64*
call void @callee2(i64* sret(i64) align 32 %cast)
%cast = addrspacecast ptr addrspace(5) %alloca to ptr
call void @callee2(ptr sret(i64) align 32 %cast)
ret void
}
101 changes: 40 additions & 61 deletions llvm/test/Transforms/PhaseOrdering/instcombine-sroa-inttoptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -45,121 +45,100 @@

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

%0 = type { i32*, i32, i32, i32 }
%0 = type { ptr, i32, i32, i32 }

define dso_local void @_Z3gen1S(%0* noalias sret(%0) align 8 %arg, %0* byval(%0) align 8 %arg1) {
define dso_local void @_Z3gen1S(ptr noalias sret(%0) align 8 %arg, ptr byval(%0) align 8 %arg1) {
; CHECK-LABEL: @_Z3gen1S(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG1:%.*]], i64 0, i32 0
; CHECK-NEXT: [[I2:%.*]] = load i32*, i32** [[I]], align 8
; CHECK-NEXT: [[I3:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0
; CHECK-NEXT: store i32* [[I2]], i32** [[I3]], align 8
; CHECK-NEXT: [[I2:%.*]] = load ptr, ptr [[ARG1:%.*]], align 8
; CHECK-NEXT: store ptr [[I2]], ptr [[ARG:%.*]], align 8
; CHECK-NEXT: ret void
;
bb:
%i = getelementptr inbounds %0, %0* %arg1, i32 0, i32 0
%i2 = load i32*, i32** %i, align 8
%i3 = getelementptr inbounds %0, %0* %arg, i32 0, i32 0
store i32* %i2, i32** %i3, align 8
%i2 = load ptr, ptr %arg1, align 8
store ptr %i2, ptr %arg, align 8
ret void
}

define dso_local i32* @_Z3foo1S(%0* byval(%0) align 8 %arg) {
define dso_local ptr @_Z3foo1S(ptr byval(%0) align 8 %arg) {
; CHECK-LABEL: @_Z3foo1S(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I2:%.*]] = alloca [[TMP0:%.*]], align 8
; CHECK-NEXT: [[I1_SROA_0_0_I5_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[ARG:%.*]], i64 0, i32 0
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I5_SROA_IDX]], align 8
; CHECK-NEXT: [[I_SROA_0_0_I6_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0]], %0* [[I2]], i64 0, i32 0
; CHECK-NEXT: store i32* [[I1_SROA_0_0_COPYLOAD]], i32** [[I_SROA_0_0_I6_SROA_IDX]], align 8
; CHECK-NEXT: tail call void @_Z7escape01S(%0* nonnull byval([[TMP0]]) align 8 [[I2]])
; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
; CHECK-NEXT: store ptr [[I1_SROA_0_0_COPYLOAD]], ptr [[I2]], align 8
; CHECK-NEXT: tail call void @_Z7escape01S(ptr nonnull byval([[TMP0]]) align 8 [[I2]])
; CHECK-NEXT: ret ptr [[I1_SROA_0_0_COPYLOAD]]
;
bb:
%i = alloca %0, align 8
%i1 = alloca %0, align 8
%i2 = alloca %0, align 8
%i3 = bitcast %0* %i to i8*
call void @llvm.lifetime.start.p0i8(i64 24, i8* %i3)
%i4 = bitcast %0* %i1 to i8*
%i5 = bitcast %0* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i4, i8* align 8 %i5, i64 24, i1 false)
call void @_Z3gen1S(%0* sret(%0) align 8 %i, %0* byval(%0) align 8 %i1)
%i6 = bitcast %0* %i2 to i8*
%i7 = bitcast %0* %i to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i6, i8* align 8 %i7, i64 24, i1 false)
call void @_Z7escape01S(%0* byval(%0) align 8 %i2)
%i8 = getelementptr inbounds %0, %0* %i, i32 0, i32 0
%i9 = load i32*, i32** %i8, align 8
%i10 = bitcast %0* %i to i8*
call void @llvm.lifetime.end.p0i8(i64 24, i8* %i10)
ret i32* %i9
call void @llvm.lifetime.start.p0(i64 24, ptr %i)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %i1, ptr align 8 %arg, i64 24, i1 false)
call void @_Z3gen1S(ptr sret(%0) align 8 %i, ptr byval(%0) align 8 %i1)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %i2, ptr align 8 %i, i64 24, i1 false)
call void @_Z7escape01S(ptr byval(%0) align 8 %i2)
%i9 = load ptr, ptr %i, align 8
call void @llvm.lifetime.end.p0(i64 24, ptr %i)
ret ptr %i9
}

declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)

declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)

declare dso_local void @_Z7escape01S(%0* byval(%0) align 8)
declare dso_local void @_Z7escape01S(ptr byval(%0) align 8)

declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)

define dso_local i32* @_Z3bar1S(%0* byval(%0) align 8 %arg) {
define dso_local ptr @_Z3bar1S(ptr byval(%0) align 8 %arg) {
; CHECK-LABEL: @_Z3bar1S(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I1_SROA_0_0_I4_SROA_IDX:%.*]] = getelementptr inbounds [[TMP0:%.*]], %0* [[ARG:%.*]], i64 0, i32 0
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load i32*, i32** [[I1_SROA_0_0_I4_SROA_IDX]], align 8
; CHECK-NEXT: [[I1_SROA_0_0_COPYLOAD:%.*]] = load ptr, ptr [[ARG:%.*]], align 8
; CHECK-NEXT: [[I5:%.*]] = tail call i32 @_Z4condv()
; CHECK-NEXT: [[I6_NOT:%.*]] = icmp eq i32 [[I5]], 0
; CHECK-NEXT: br i1 [[I6_NOT]], label [[BB10:%.*]], label [[BB7:%.*]]
; CHECK: bb7:
; CHECK-NEXT: tail call void @_Z5sync0v()
; CHECK-NEXT: tail call void @_Z7escape0Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: tail call void @_Z7escape0Pi(ptr [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: br label [[BB13:%.*]]
; CHECK: bb10:
; CHECK-NEXT: tail call void @_Z5sync1v()
; CHECK-NEXT: tail call void @_Z7escape1Pi(i32* [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: tail call void @_Z7escape1Pi(ptr [[I1_SROA_0_0_COPYLOAD]])
; CHECK-NEXT: br label [[BB13]]
; CHECK: bb13:
; CHECK-NEXT: ret i32* [[I1_SROA_0_0_COPYLOAD]]
; CHECK-NEXT: ret ptr [[I1_SROA_0_0_COPYLOAD]]
;
bb:
%i = alloca %0, align 8
%i1 = alloca %0, align 8
%i2 = bitcast %0* %i to i8*
call void @llvm.lifetime.start.p0i8(i64 24, i8* %i2)
%i3 = bitcast %0* %i1 to i8*
%i4 = bitcast %0* %arg to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %i3, i8* align 8 %i4, i64 24, i1 false)
call void @_Z3gen1S(%0* sret(%0) align 8 %i, %0* byval(%0) align 8 %i1)
call void @llvm.lifetime.start.p0(i64 24, ptr %i)
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %i1, ptr align 8 %arg, i64 24, i1 false)
call void @_Z3gen1S(ptr sret(%0) align 8 %i, ptr byval(%0) align 8 %i1)
%i5 = call i32 @_Z4condv()
%i6 = icmp ne i32 %i5, 0
br i1 %i6, label %bb7, label %bb10

bb7:
call void @_Z5sync0v()
%i8 = getelementptr inbounds %0, %0* %i, i32 0, i32 0
%i9 = load i32*, i32** %i8, align 8
call void @_Z7escape0Pi(i32* %i9)
%i9 = load ptr, ptr %i, align 8
call void @_Z7escape0Pi(ptr %i9)
br label %bb13

bb10:
call void @_Z5sync1v()
%i11 = getelementptr inbounds %0, %0* %i, i32 0, i32 0
%i12 = load i32*, i32** %i11, align 8
call void @_Z7escape1Pi(i32* %i12)
%i12 = load ptr, ptr %i, align 8
call void @_Z7escape1Pi(ptr %i12)
br label %bb13

bb13:
%i14 = getelementptr inbounds %0, %0* %i, i32 0, i32 0
%i15 = load i32*, i32** %i14, align 8
%i16 = bitcast %0* %i to i8*
call void @llvm.lifetime.end.p0i8(i64 24, i8* %i16)
ret i32* %i15
%i15 = load ptr, ptr %i, align 8
call void @llvm.lifetime.end.p0(i64 24, ptr %i)
ret ptr %i15
}

declare dso_local i32 @_Z4condv()
declare dso_local void @_Z5sync0v()
declare dso_local void @_Z7escape0Pi(i32*)
declare dso_local void @_Z7escape0Pi(ptr)
declare dso_local void @_Z5sync1v()
declare dso_local void @_Z7escape1Pi(i32*)
declare dso_local void @_Z7escape1Pi(ptr)
30 changes: 15 additions & 15 deletions llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
; RUN: opt < %s -passes='default<O2>' -S | FileCheck %s
; RUN: opt < %s -passes='default<O3>' -S | FileCheck %s

declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
declare void @foo(i8* nocapture)
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
declare void @foo(ptr nocapture)

define void @asan() sanitize_address {
entry:
; CHECK-LABEL: @asan(
%text = alloca i8, align 1

call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.start.p0(i64 1, ptr %text)
call void @llvm.lifetime.end.p0(i64 1, ptr %text)
; CHECK: call void @llvm.lifetime.start
; CHECK-NEXT: call void @llvm.lifetime.end

call void @foo(i8* %text) ; Keep alloca alive
call void @foo(ptr %text) ; Keep alloca alive

ret void
}
Expand All @@ -31,12 +31,12 @@ entry:
; CHECK-LABEL: @hwasan(
%text = alloca i8, align 1

call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.start.p0(i64 1, ptr %text)
call void @llvm.lifetime.end.p0(i64 1, ptr %text)
; CHECK: call void @llvm.lifetime.start
; CHECK-NEXT: call void @llvm.lifetime.end

call void @foo(i8* %text) ; Keep alloca alive
call void @foo(ptr %text) ; Keep alloca alive

ret void
}
Expand All @@ -46,12 +46,12 @@ entry:
; CHECK-LABEL: @msan(
%text = alloca i8, align 1

call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.start.p0(i64 1, ptr %text)
call void @llvm.lifetime.end.p0(i64 1, ptr %text)
; CHECK: call void @llvm.lifetime.start
; CHECK-NEXT: call void @llvm.lifetime.end

call void @foo(i8* %text) ; Keep alloca alive
call void @foo(ptr %text) ; Keep alloca alive

ret void
}
Expand All @@ -61,11 +61,11 @@ entry:
; CHECK-LABEL: @no_asan(
%text = alloca i8, align 1

call void @llvm.lifetime.start.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.end.p0i8(i64 1, i8* %text)
call void @llvm.lifetime.start.p0(i64 1, ptr %text)
call void @llvm.lifetime.end.p0(i64 1, ptr %text)
; CHECK-NO: call void @llvm.lifetime

call void @foo(i8* %text) ; Keep alloca alive
call void @foo(ptr %text) ; Keep alloca alive

ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ declare void @f0()
declare void @f1()
declare void @f2()

declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)

define void @_Z4loopi(i32 %width) {
; HOIST-LABEL: @_Z4loopi(
Expand Down Expand Up @@ -91,30 +91,28 @@ define void @_Z4loopi(i32 %width) {
entry:
%width.addr = alloca i32, align 4
%i = alloca i32, align 4
store i32 %width, i32* %width.addr, align 4
%i1 = load i32, i32* %width.addr, align 4
store i32 %width, ptr %width.addr, align 4
%i1 = load i32, ptr %width.addr, align 4
%cmp = icmp slt i32 %i1, 1
br i1 %cmp, label %if.then, label %if.end

if.then:
br label %return

if.end:
%i2 = bitcast i32* %i to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
store i32 0, i32* %i, align 4
call void @llvm.lifetime.start.p0(i64 4, ptr %i)
store i32 0, ptr %i, align 4
br label %for.cond

for.cond:
%i3 = load i32, i32* %i, align 4
%i4 = load i32, i32* %width.addr, align 4
%i3 = load i32, ptr %i, align 4
%i4 = load i32, ptr %width.addr, align 4
%sub = sub nsw i32 %i4, 1
%cmp1 = icmp slt i32 %i3, %sub
br i1 %cmp1, label %for.body, label %for.cond.cleanup

for.cond.cleanup:
%i5 = bitcast i32* %i to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
call void @llvm.lifetime.end.p0(i64 4, ptr %i)
br label %for.end

for.body:
Expand All @@ -123,9 +121,9 @@ for.body:
br label %for.inc

for.inc:
%i6 = load i32, i32* %i, align 4
%i6 = load i32, ptr %i, align 4
%inc = add nsw i32 %i6, 1
store i32 %inc, i32* %i, align 4
store i32 %inc, ptr %i, align 4
br label %for.cond

for.end:
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/PhaseOrdering/lto-licm.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='lto<O3>' -S < %s | FileCheck %s

define void @hoist_fdiv(float* %a, float %b) {
define void @hoist_fdiv(ptr %a, float %b) {
; CHECK-LABEL: @hoist_fdiv(
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
Expand All @@ -11,10 +11,10 @@ define void @hoist_fdiv(float* %a, float %b) {
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_INC]]
; CHECK: for.inc:
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_0]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float [[TMP0]], [[B:%.*]]
; CHECK-NEXT: store float [[TMP1]], float* [[ARRAYIDX]], align 4
; CHECK-NEXT: store float [[TMP1]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
Expand All @@ -33,10 +33,10 @@ for.cond.cleanup:

for.body:
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
%0 = load float, float* %arrayidx, align 4
%arrayidx = getelementptr inbounds float, ptr %a, i64 %idxprom
%0 = load float, ptr %arrayidx, align 4
%div = fdiv fast float %0, %b
store float %div, float* %arrayidx, align 4
store float %div, ptr %arrayidx, align 4
br label %for.inc

for.inc:
Expand Down
34 changes: 16 additions & 18 deletions llvm/test/Transforms/PhaseOrdering/openmp-opt-module.ll
Original file line number Diff line number Diff line change
@@ -1,40 +1,38 @@
; RUN: opt -passes='default<O2>' -pass-remarks-missed=openmp-opt < %s 2>&1 | FileCheck %s --check-prefix=MODULE
target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64"

%struct.ident_t = type { i32, i32, i32, i32, i8* }
%struct.ident_t = type { i32, i32, i32, i32, ptr }

@.str = private unnamed_addr constant [13 x i8] c"Alloc Shared\00", align 1

@S = external local_unnamed_addr global i8*
@S = external local_unnamed_addr global ptr

; MODULE: remark: openmp_opt_module.c:5:7: Found thread data sharing on the GPU. Expect degraded performance due to data globalization.

define void @foo() {
entry:
%i = call i32 @__kmpc_target_init(%struct.ident_t* null, i1 false, i1 true, i1 true)
%x = call i8* @__kmpc_alloc_shared(i64 4), !dbg !10
%x_on_stack = bitcast i8* %x to i32*
%0 = bitcast i32* %x_on_stack to i8*
call void @use(i8* %0)
call void @__kmpc_free_shared(i8* %x)
call void @__kmpc_target_deinit(%struct.ident_t* null, i1 false, i1 true)
%i = call i32 @__kmpc_target_init(ptr null, i1 false, i1 true, i1 true)
%x = call ptr @__kmpc_alloc_shared(i64 4), !dbg !10
call void @use(ptr %x)
call void @__kmpc_free_shared(ptr %x)
call void @__kmpc_target_deinit(ptr null, i1 false, i1 true)
ret void
}

declare void @use(i8* %0)
declare void @use(ptr %x)

define weak i8* @__kmpc_alloc_shared(i64 %DataSize) {
define weak ptr @__kmpc_alloc_shared(i64 %DataSize) {
entry:
%call = call i8* @_Z10SafeMallocmPKc(i64 %DataSize, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0)) #11
ret i8* %call
%call = call ptr @_Z10SafeMallocmPKc(i64 %DataSize, ptr @.str) #11
ret ptr %call
}

; Function Attrs: convergent nounwind mustprogress
declare i8* @_Z10SafeMallocmPKc(i64 %size, i8* nocapture readnone %msg)
declare ptr @_Z10SafeMallocmPKc(i64 %size, ptr nocapture readnone %msg)

declare void @__kmpc_free_shared(i8*)
declare i32 @__kmpc_target_init(%struct.ident_t*, i1, i1 %use_generic_state_machine, i1)
declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
declare void @__kmpc_free_shared(ptr)
declare i32 @__kmpc_target_init(ptr, i1, i1 %use_generic_state_machine, i1)
declare void @__kmpc_target_deinit(ptr, i1, i1)

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5, !6}
Expand All @@ -47,7 +45,7 @@ declare void @__kmpc_target_deinit(%struct.ident_t*, i1, i1)
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 7, !"openmp", i32 50}
!6 = !{i32 7, !"openmp-device", i32 50}
!7 = !{void ()* @foo, !"kernel", i32 1}
!7 = !{ptr @foo, !"kernel", i32 1}
!8 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
!9 = !DISubroutineType(types: !2)
!10 = !DILocation(line: 5, column: 7, scope: !8)
30 changes: 15 additions & 15 deletions llvm/test/Transforms/PhaseOrdering/pr32544.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@
; RUN: opt -O3 -S < %s | FileCheck %s
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s

define void @foo(i1 %which, i32 %a, i32 %b, i64 *%result) {
define void @foo(i1 %which, i32 %a, i32 %b, ptr %result) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
; CHECK-NEXT: [[Z_V_P:%.*]] = select i1 [[WHICH:%.*]], i32 [[B]], i32 [[TMP0]]
; CHECK-NEXT: [[Z_V:%.*]] = add i32 [[Z_V_P]], [[A:%.*]]
; CHECK-NEXT: [[Z:%.*]] = zext i32 [[Z_V]] to i64
; CHECK-NEXT: [[C:%.*]] = load i64, i64* [[RESULT:%.*]], align 4
; CHECK-NEXT: [[C:%.*]] = load i64, ptr [[RESULT:%.*]], align 4
; CHECK-NEXT: [[VALUE:%.*]] = add i64 [[C]], [[Z]]
; CHECK-NEXT: store i64 [[VALUE]], i64* [[RESULT]], align 4
; CHECK-NEXT: store i64 [[VALUE]], ptr [[RESULT]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -26,22 +26,22 @@ delay:

final:
%z = phi i64 [ %x2, %entry ], [ %y2, %delay ]
%c = load i64, i64* %result
%c = load i64, ptr %result
%value = add i64 %z, %c
store i64 %value, i64* %result
store i64 %value, ptr %result
ret void
}

define void @bar(i1 %which, i32 %a, i32 %b, i64 *%result) {
define void @bar(i1 %which, i32 %a, i32 %b, ptr %result) {
; CHECK-LABEL: @bar(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
; CHECK-NEXT: [[SPEC_SELECT_P:%.*]] = select i1 [[WHICH:%.*]], i32 [[B]], i32 [[TMP0]]
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = add i32 [[SPEC_SELECT_P]], [[A:%.*]]
; CHECK-NEXT: [[Z2:%.*]] = zext i32 [[SPEC_SELECT]] to i64
; CHECK-NEXT: [[C:%.*]] = load i64, i64* [[RESULT:%.*]], align 4
; CHECK-NEXT: [[C:%.*]] = load i64, ptr [[RESULT:%.*]], align 4
; CHECK-NEXT: [[VALUE:%.*]] = add i64 [[C]], [[Z2]]
; CHECK-NEXT: store i64 [[VALUE]], i64* [[RESULT]], align 4
; CHECK-NEXT: store i64 [[VALUE]], ptr [[RESULT]], align 4
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -55,31 +55,31 @@ delay:
final:
%z = phi i32 [ %x, %entry ], [ %y, %delay ]
%z2 = zext i32 %z to i64
%c = load i64, i64* %result
%c = load i64, ptr %result
%value = add i64 %z2, %c
store i64 %value, i64* %result
store i64 %value, ptr %result
ret void
}

define void @foo_opt(i1 %which, i32 %a, i32 %b, i64* nocapture %result) {
define void @foo_opt(i1 %which, i32 %a, i32 %b, ptr nocapture %result) {
; CHECK-LABEL: @foo_opt(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = sub i32 0, [[B:%.*]]
; CHECK-NEXT: [[Z_V_P:%.*]] = select i1 [[WHICH:%.*]], i32 [[B]], i32 [[TMP0]]
; CHECK-NEXT: [[Z_V:%.*]] = add i32 [[Z_V_P]], [[A:%.*]]
; CHECK-NEXT: [[Z:%.*]] = zext i32 [[Z_V]] to i64
; CHECK-NEXT: [[C:%.*]] = load i64, i64* [[RESULT:%.*]], align 4
; CHECK-NEXT: [[C:%.*]] = load i64, ptr [[RESULT:%.*]], align 4
; CHECK-NEXT: [[VALUE:%.*]] = add i64 [[C]], [[Z]]
; CHECK-NEXT: store i64 [[VALUE]], i64* [[RESULT]], align 4
; CHECK-NEXT: store i64 [[VALUE]], ptr [[RESULT]], align 4
; CHECK-NEXT: ret void
;
entry:
%0 = sub i32 0, %b
%z.v.p = select i1 %which, i32 %b, i32 %0
%z.v = add i32 %z.v.p, %a
%z = zext i32 %z.v to i64
%c = load i64, i64* %result, align 4
%c = load i64, ptr %result, align 4
%value = add i64 %c, %z
store i64 %value, i64* %result, align 4
store i64 %value, ptr %result, align 4
ret void
}
28 changes: 14 additions & 14 deletions llvm/test/Transforms/PhaseOrdering/pr36760.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,25 @@ define i64 @PR36760(i64 %a) {
entry:
%retval = alloca i64, align 8
%a.addr = alloca i64, align 8
store i64 %a, i64* %a.addr, align 8
%0 = load i64, i64* %a.addr, align 8
store i64 %a, ptr %a.addr, align 8
%0 = load i64, ptr %a.addr, align 8
%cmp = icmp slt i64 %0, 0
br i1 %cmp, label %if.then, label %if.end

if.then:
store i64 0, i64* %retval, align 8
store i64 0, ptr %retval, align 8
br label %return

if.end:
%1 = load i64, i64* %a.addr, align 8
%1 = load i64, ptr %a.addr, align 8
%shr = ashr i64 %1, 63
%2 = load i64, i64* %a.addr, align 8
%2 = load i64, ptr %a.addr, align 8
%xor = xor i64 %shr, %2
store i64 %xor, i64* %retval, align 8
store i64 %xor, ptr %retval, align 8
br label %return

return:
%3 = load i64, i64* %retval, align 8
%3 = load i64, ptr %retval, align 8
ret i64 %3
}

Expand All @@ -42,24 +42,24 @@ define i64 @PR36760_2(i64 %a) #0 {
entry:
%retval = alloca i64, align 8
%a.addr = alloca i64, align 8
store i64 %a, i64* %a.addr, align 8
%0 = load i64, i64* %a.addr, align 8
store i64 %a, ptr %a.addr, align 8
%0 = load i64, ptr %a.addr, align 8
%cmp = icmp sge i64 %0, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
store i64 0, i64* %retval, align 8
store i64 0, ptr %retval, align 8
br label %return

if.end: ; preds = %entry
%1 = load i64, i64* %a.addr, align 8
%1 = load i64, ptr %a.addr, align 8
%shr = ashr i64 %1, 63
%2 = load i64, i64* %a.addr, align 8
%2 = load i64, ptr %a.addr, align 8
%xor = xor i64 %shr, %2
store i64 %xor, i64* %retval, align 8
store i64 %xor, ptr %retval, align 8
br label %return

return: ; preds = %if.end, %if.then
%3 = load i64, i64* %retval, align 8
%3 = load i64, ptr %retval, align 8
ret i64 %3
}
38 changes: 19 additions & 19 deletions llvm/test/Transforms/PhaseOrdering/pr39282.ll
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s

define void @copy(i32* noalias %to, i32* noalias %from) {
define void @copy(ptr noalias %to, ptr noalias %from) {
; CHECK-LABEL: @copy(
; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[FROM:%.*]], align 4
; CHECK-NEXT: store i32 [[X]], i32* [[TO:%.*]], align 4
; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[FROM:%.*]], align 4
; CHECK-NEXT: store i32 [[X]], ptr [[TO:%.*]], align 4
; CHECK-NEXT: ret void
;
%x = load i32, i32* %from
store i32 %x, i32* %to
%x = load i32, ptr %from
store i32 %x, ptr %to
ret void
}

; Consider that %addr1 = %addr2 + 1, in which case %addr2i and %addr1i are
; noalias within one iteration, but may alias across iterations.
define void @pr39282(i32* %addr1, i32* %addr2) {
define void @pr39282(ptr %addr1, ptr %addr2) {
; CHECK-LABEL: @pr39282(
; CHECK-NEXT: start:
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I:%.*]] = load i32, i32* [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: store i32 [[X_I]], i32* [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR1]], i64 1
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, i32* [[ADDR2]], i64 1
; CHECK-NEXT: [[X_I:%.*]] = load i32, ptr [[ADDR1:%.*]], align 4, !alias.scope !3, !noalias !0
; CHECK-NEXT: store i32 [[X_I]], ptr [[ADDR2:%.*]], align 4, !alias.scope !0, !noalias !3
; CHECK-NEXT: [[ADDR1I_1:%.*]] = getelementptr inbounds i32, ptr [[ADDR1]], i64 1
; CHECK-NEXT: [[ADDR2I_1:%.*]] = getelementptr inbounds i32, ptr [[ADDR2]], i64 1
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_1:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5
; CHECK-NEXT: store i32 [[X_I_1]], i32* [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7
; CHECK-NEXT: [[X_I_1:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope !7, !noalias !5
; CHECK-NEXT: store i32 [[X_I_1]], ptr [[ADDR2I_1]], align 4, !alias.scope !5, !noalias !7
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_2:%.*]] = load i32, i32* [[ADDR1]], align 4, !alias.scope !11, !noalias !9
; CHECK-NEXT: store i32 [[X_I_2]], i32* [[ADDR2]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: [[X_I_2:%.*]] = load i32, ptr [[ADDR1]], align 4, !alias.scope !11, !noalias !9
; CHECK-NEXT: store i32 [[X_I_2]], ptr [[ADDR2]], align 4, !alias.scope !9, !noalias !11
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl
; CHECK-NEXT: [[X_I_3:%.*]] = load i32, i32* [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13
; CHECK-NEXT: store i32 [[X_I_3]], i32* [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15
; CHECK-NEXT: [[X_I_3:%.*]] = load i32, ptr [[ADDR1I_1]], align 4, !alias.scope !15, !noalias !13
; CHECK-NEXT: store i32 [[X_I_3]], ptr [[ADDR2I_1]], align 4, !alias.scope !13, !noalias !15
; CHECK-NEXT: ret void
;
start:
Expand All @@ -43,9 +43,9 @@ start:
body:
%i = phi i32 [ 0, %start ], [ %i.next, %body ]
%j = and i32 %i, 1
%addr1i = getelementptr inbounds i32, i32* %addr1, i32 %j
%addr2i = getelementptr inbounds i32, i32* %addr2, i32 %j
call void @copy(i32* %addr2i, i32* %addr1i)
%addr1i = getelementptr inbounds i32, ptr %addr1, i32 %j
%addr2i = getelementptr inbounds i32, ptr %addr2, i32 %j
call void @copy(ptr %addr2i, ptr %addr1i)
%i.next = add i32 %i, 1
%cmp = icmp slt i32 %i.next, 4
br i1 %cmp, label %body, label %end
Expand Down
8 changes: 3 additions & 5 deletions llvm/test/Transforms/PhaseOrdering/pr40750.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,17 @@

%struct.test = type { i8, [3 x i8] }

define i32 @get(%struct.test* nocapture readonly %arg) {
define i32 @get(ptr nocapture readonly %arg) {
; CHECK-LABEL: @get(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT_TEST:%.*]], %struct.test* [[ARG:%.*]], i64 0, i32 0
; CHECK-NEXT: [[I1:%.*]] = load i8, i8* [[I]], align 4
; CHECK-NEXT: [[I1:%.*]] = load i8, ptr [[ARG:%.*]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[I1]], 3
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 [[TMP0]], 0
; CHECK-NEXT: [[I9:%.*]] = zext i1 [[TMP1]] to i32
; CHECK-NEXT: ret i32 [[I9]]
;
bb:
%i = getelementptr inbounds %struct.test, %struct.test* %arg, i64 0, i32 0
%i1 = load i8, i8* %i, align 4
%i1 = load i8, ptr %arg, align 4
%i2 = and i8 %i1, 1
%i3 = icmp eq i8 %i2, 0
br i1 %i3, label %bb4, label %bb8
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/PhaseOrdering/pr45682.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ define void @PR45682(i32 %x, i32 %y) {
entry:
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
store i32 %x, i32* %x.addr, align 4
store i32 %y, i32* %y.addr, align 4
%0 = load i32, i32* %y.addr, align 4
store i32 %x, ptr %x.addr, align 4
store i32 %y, ptr %y.addr, align 4
%0 = load i32, ptr %y.addr, align 4
%cmp = icmp sgt i32 %0, 0
call void @llvm.assume(i1 %cmp)
%1 = load i32, i32* %y.addr, align 4
%2 = load i32, i32* %x.addr, align 4
%1 = load i32, ptr %y.addr, align 4
%2 = load i32, ptr %x.addr, align 4
%add = add nsw i32 %2, %1
store i32 %add, i32* %x.addr, align 4
%3 = load i32, i32* %x.addr, align 4
store i32 %add, ptr %x.addr, align 4
%3 = load i32, ptr %x.addr, align 4
%cmp1 = icmp eq i32 %3, -2147483648
br i1 %cmp1, label %if.then, label %if.end

Expand Down
30 changes: 15 additions & 15 deletions llvm/test/Transforms/PhaseOrdering/reassociate-after-unroll.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,37 @@ entry:
%k = alloca i32, align 4
%g = alloca i64, align 8
%i = alloca i64, align 8
store i64 %blah, i64* %blah.addr, align 8
store i64 %limit, i64* %limit.addr, align 8
store i32 1, i32* %k, align 4
store i64 0, i64* %i, align 8
store i64 %blah, ptr %blah.addr, align 8
store i64 %limit, ptr %limit.addr, align 8
store i32 1, ptr %k, align 4
store i64 0, ptr %i, align 8
br label %for.cond

for.cond:
%0 = load i64, i64* %i, align 8
%1 = load i64, i64* %limit.addr, align 8
%0 = load i64, ptr %i, align 8
%1 = load i64, ptr %limit.addr, align 8
%cmp = icmp ult i64 %0, %1
br i1 %cmp, label %for.body, label %for.cond.cleanup

for.cond.cleanup:
%2 = load i64, i64* %g, align 8
%2 = load i64, ptr %g, align 8
ret i64 %2

for.body:
%3 = load i64, i64* %blah.addr, align 8
%4 = load i32, i32* %k, align 4
%3 = load i64, ptr %blah.addr, align 8
%4 = load i32, ptr %k, align 4
%conv = zext i32 %4 to i64
%and = and i64 %conv, %3
%conv1 = trunc i64 %and to i32
store i32 %conv1, i32* %k, align 4
%5 = load i32, i32* %k, align 4
store i32 %conv1, ptr %k, align 4
%5 = load i32, ptr %k, align 4
%conv2 = zext i32 %5 to i64
%6 = load i64, i64* %g, align 8
%6 = load i64, ptr %g, align 8
%add = add i64 %6, %conv2
store i64 %add, i64* %g, align 8
%7 = load i64, i64* %i, align 8
store i64 %add, ptr %g, align 8
%7 = load i64, ptr %i, align 8
%inc = add i64 %7, 1
store i64 %inc, i64* %i, align 8
store i64 %inc, ptr %i, align 8
br label %for.cond
}

Expand Down
64 changes: 29 additions & 35 deletions llvm/test/Transforms/PhaseOrdering/recompute-globalsaa.ll
Original file line number Diff line number Diff line change
@@ -1,50 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='default<O2>' -S < %s | FileCheck %s
@a = internal global i32 0, align 4
@c = internal global i32** @b, align 8
@b = internal global i32* null, align 8
@e = internal global i32* @d, align 8
@c = internal global ptr @b, align 8
@b = internal global ptr null, align 8
@e = internal global ptr @d, align 8
@d = internal global i32 0, align 4

define i32 @main() {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** @e, align 8
; CHECK-NEXT: store i32 0, i32* [[TMP0]], align 4
; CHECK-NEXT: store i32* null, i32** @e, align 8
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @e, align 8
; CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4
; CHECK-NEXT: store ptr null, ptr @e, align 8
; CHECK-NEXT: ret i32 0
;
entry:
%h = alloca i32*, align 8
%i = alloca i32*, align 8
%j = alloca i32*, align 8
%0 = bitcast i32** %h to i8*
store i32* @a, i32** %h, align 8
%1 = bitcast i32** %i to i8*
%2 = bitcast i32** %j to i8*
store i32* @a, i32** %j, align 8
%3 = load i32*, i32** %j, align 8
store i32 1, i32* %3, align 4
store i32* @a, i32** %i, align 8
%4 = load i32*, i32** %i, align 8
%5 = load i32**, i32*** @c, align 8
store i32* %4, i32** %5, align 8
%6 = load i32*, i32** %h, align 8
%call = call i32* @bar(i32* %6)
%7 = bitcast i32** %j to i8*
%8 = bitcast i32** %i to i8*
%9 = bitcast i32** %h to i8*
%h = alloca ptr, align 8
%i = alloca ptr, align 8
%j = alloca ptr, align 8
store ptr @a, ptr %h, align 8
store ptr @a, ptr %j, align 8
%0 = load ptr, ptr %j, align 8
store i32 1, ptr %0, align 4
store ptr @a, ptr %i, align 8
%1 = load ptr, ptr %i, align 8
%2 = load ptr, ptr @c, align 8
store ptr %1, ptr %2, align 8
%3 = load ptr, ptr %h, align 8
%call = call ptr @bar(ptr %3)
ret i32 0
}

define internal i32* @bar(i32* %g) {
define internal ptr @bar(ptr %g) {
entry:
%g.addr = alloca i32*, align 8
store i32* %g, i32** %g.addr, align 8
%0 = load i32*, i32** @e, align 8
store i32 0, i32* %0, align 4
%1 = load i32*, i32** %g.addr, align 8
%2 = load i32, i32* %1, align 4
%g.addr = alloca ptr, align 8
store ptr %g, ptr %g.addr, align 8
%0 = load ptr, ptr @e, align 8
store i32 0, ptr %0, align 4
%1 = load ptr, ptr %g.addr, align 8
%2 = load i32, ptr %1, align 4
%tobool = icmp ne i32 %2, 0
br i1 %tobool, label %if.end, label %if.then

Expand All @@ -53,9 +47,9 @@ if.then: ; preds = %entry
br label %if.end

if.end: ; preds = %if.then, %entry
store i32* null, i32** @e, align 8
%3 = load i32*, i32** @b, align 8
ret i32* %3
store ptr null, ptr @e, align 8
%3 = load ptr, ptr @b, align 8
ret ptr %3
}

declare void @foo()
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,22 +11,22 @@ target datalayout = "e-m:m-p:40:64:64:32-i32:32-i16:16-i8:8-n32"
; The loop body contains two increments by %div.
; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
; CHECK: --> {%p,+,(8 * (%d /u 4))}
define void @test1(i32 %d, i32* %p) nounwind uwtable ssp {
define void @test1(i32 %d, ptr %p) nounwind uwtable ssp {
entry:
%div = udiv i32 %d, 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
%p.addr.0 = phi ptr [ %p, %entry ], [ %add.ptr1, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp ne i32 %i.0, 64
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
store i32 0, i32* %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div
store i32 1, i32* %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div
store i32 0, ptr %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, ptr %p.addr.0, i32 %div
store i32 1, ptr %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %div
br label %for.inc

for.inc: ; preds = %for.body
Expand All @@ -40,22 +40,22 @@ for.end: ; preds = %for.cond
; CHECK: test1a
; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
; CHECK: --> {%p,+,(8 * (%d /u 2))}
define void @test1a(i32 %d, i32* %p) nounwind uwtable ssp {
define void @test1a(i32 %d, ptr %p) nounwind uwtable ssp {
entry:
%div = udiv i32 %d, 2
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
%p.addr.0 = phi ptr [ %p, %entry ], [ %add.ptr1, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp ne i32 %i.0, 64
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
store i32 0, i32* %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i32 %div
store i32 1, i32* %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i32 %div
store i32 0, ptr %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, ptr %p.addr.0, i32 %div
store i32 1, ptr %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i32 %div
br label %for.inc

for.inc: ; preds = %for.body
Expand All @@ -66,7 +66,7 @@ for.end: ; preds = %for.cond
ret void
}

@array = weak global [101 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=1]
@array = weak global [101 x i32] zeroinitializer, align 32 ; <ptr> [#uses=1]

; CHECK: Loop %bb: backedge-taken count is 100

Expand All @@ -76,8 +76,8 @@ entry:

bb: ; preds = %bb, %entry
%i.01.0 = phi i32 [ 100, %entry ], [ %tmp4, %bb ] ; <i32> [#uses=2]
%tmp1 = getelementptr [101 x i32], [101 x i32]* @array, i32 0, i32 %i.01.0 ; <i32*> [#uses=1]
store i32 %x, i32* %tmp1
%tmp1 = getelementptr [101 x i32], ptr @array, i32 0, i32 %i.01.0 ; <ptr> [#uses=1]
store i32 %x, ptr %tmp1
%tmp4 = add i32 %i.01.0, -1 ; <i32> [#uses=2]
%tmp7 = icmp sgt i32 %tmp4, -1 ; <i1> [#uses=1]
br i1 %tmp7, label %bb, label %return
Expand All @@ -86,17 +86,17 @@ return: ; preds = %bb
ret void
}

define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwind {
bb1.thread:
br label %bb1

bb1: ; preds = %bb1, %bb1.thread
%indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ] ; <i32> [#uses=4]
%i.0.reg2mem.0 = sub i32 255, %indvar ; <i32> [#uses=2]
%0 = getelementptr i32, i32* %alp, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
%1 = load i32, i32* %0, align 4 ; <i32> [#uses=1]
%2 = getelementptr i32, i32* %lam, i32 %i.0.reg2mem.0 ; <i32*> [#uses=1]
store i32 %1, i32* %2, align 4
%0 = getelementptr i32, ptr %alp, i32 %i.0.reg2mem.0 ; <ptr> [#uses=1]
%1 = load i32, ptr %0, align 4 ; <i32> [#uses=1]
%2 = getelementptr i32, ptr %lam, i32 %i.0.reg2mem.0 ; <ptr> [#uses=1]
store i32 %1, ptr %2, align 4
%3 = sub i32 254, %indvar ; <i32> [#uses=1]
%4 = icmp slt i32 %3, 0 ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
Expand All @@ -117,18 +117,18 @@ declare void @llvm.experimental.guard(i1, ...)
; CHECK: Loop %loop: Unpredictable constant max backedge-taken count.
define void @test_range_ref1(i8 %t) {
entry:
%t.ptr = inttoptr i8 %t to i8*
%p.42 = inttoptr i8 42 to i8*
%cmp1 = icmp slt i8* %t.ptr, %p.42
%t.ptr = inttoptr i8 %t to ptr
%p.42 = inttoptr i8 42 to ptr
%cmp1 = icmp slt ptr %t.ptr, %p.42
call void(i1, ...) @llvm.experimental.guard(i1 %cmp1) [ "deopt"() ]
br label %loop

loop:
%idx = phi i8* [ %t.ptr, %entry ], [ %snext, %loop ]
%snext = getelementptr inbounds i8, i8* %idx, i64 1
%c = icmp slt i8* %idx, %p.42
%idx = phi ptr [ %t.ptr, %entry ], [ %snext, %loop ]
%snext = getelementptr inbounds i8, ptr %idx, i64 1
%c = icmp slt ptr %idx, %p.42
call void @use(i1 %c)
%be = icmp slt i8* %snext, %p.42
%be = icmp slt ptr %snext, %p.42
br i1 %be, label %loop, label %exit

exit:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/PhaseOrdering/scev.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,22 @@
; The loop body contains two increments by %div.
; Make sure that 2*%div is recognizable, and not expressed as a bit mask of %d.
; CHECK: --> {%p,+,(8 * (%d /u 4))}
define void @test1(i64 %d, i32* %p) nounwind uwtable ssp {
define void @test1(i64 %d, ptr %p) nounwind uwtable ssp {
entry:
%div = udiv i64 %d, 4
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
%p.addr.0 = phi ptr [ %p, %entry ], [ %add.ptr1, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp ne i32 %i.0, 64
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
store i32 0, i32* %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 %div
store i32 1, i32* %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i64 %div
store i32 0, ptr %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, ptr %p.addr.0, i64 %div
store i32 1, ptr %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %div
br label %for.inc

for.inc: ; preds = %for.body
Expand All @@ -37,22 +37,22 @@ for.end: ; preds = %for.cond
; CHECK: test1a
; Same thing as test1, but it is even more tempting to fold 2 * (%d /u 2)
; CHECK: --> {%p,+,(8 * (%d /u 2))}
define void @test1a(i64 %d, i32* %p) nounwind uwtable ssp {
define void @test1a(i64 %d, ptr %p) nounwind uwtable ssp {
entry:
%div = udiv i64 %d, 2
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%p.addr.0 = phi i32* [ %p, %entry ], [ %add.ptr1, %for.inc ]
%p.addr.0 = phi ptr [ %p, %entry ], [ %add.ptr1, %for.inc ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp ne i32 %i.0, 64
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
store i32 0, i32* %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, i32* %p.addr.0, i64 %div
store i32 1, i32* %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, i32* %add.ptr, i64 %div
store i32 0, ptr %p.addr.0, align 4
%add.ptr = getelementptr inbounds i32, ptr %p.addr.0, i64 %div
store i32 1, ptr %add.ptr, align 4
%add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %div
br label %for.inc

for.inc: ; preds = %for.body
Expand Down
Loading