252 changes: 125 additions & 127 deletions llvm/test/Transforms/CodeGenPrepare/ARM/sink-addrmode.ll

Large diffs are not rendered by default.

44 changes: 18 additions & 26 deletions llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,22 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -cgpp-huge-func=0 -S | FileCheck -check-prefix=CHECK %s

; Sink the shufflevector/insertelement pair, followed by the trunc. The sunk instruction end up dead.
define signext i8 @dead(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
define signext i8 @dead(ptr noalias nocapture readonly %s1, i16 zeroext %x, ptr noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @dead(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X:%.*]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[L6]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, ptr [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store <8 x i8> [[L9]], ptr [[L13]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand All @@ -36,14 +34,12 @@ entry:

vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l6 = getelementptr inbounds i16, ptr %s1, i32 %index
%wide.load = load <8 x i16>, ptr %l6, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%l13 = getelementptr inbounds i8, ptr %d, i32 %index
store <8 x i8> %l9, ptr %l13, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
Expand All @@ -53,7 +49,7 @@ exit: ; preds = %vector.body
}

; Same as above, but the shuffle has an extra use meaning it shouldnt be deleted
define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
define signext i8 @alive(ptr noalias nocapture readonly %s1, i16 zeroext %x, ptr noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @alive(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
Expand All @@ -65,16 +61,14 @@ define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[L6]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, ptr [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store <8 x i8> [[L9]], ptr [[L13]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand All @@ -91,14 +85,12 @@ entry:

vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l6 = getelementptr inbounds i16, ptr %s1, i32 %index
%wide.load = load <8 x i16>, ptr %l6, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%l13 = getelementptr inbounds i8, ptr %d, i32 %index
store <8 x i8> %l9, ptr %l13, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
Expand Down
44 changes: 18 additions & 26 deletions llvm/test/Transforms/CodeGenPrepare/ARM/sinkchain.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,22 @@
; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp < %s -codegenprepare -cgpp-huge-func=0 -S | FileCheck -check-prefix=CHECK %s

; Sink the shufflevector/insertelement pair, followed by the trunc. The sunk instruction end up dead.
define signext i8 @dead(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
define signext i8 @dead(ptr noalias nocapture readonly %s1, i16 zeroext %x, ptr noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @dead(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X:%.*]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[L6]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, ptr [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store <8 x i8> [[L9]], ptr [[L13]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand All @@ -36,14 +34,12 @@ entry:

vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l6 = getelementptr inbounds i16, ptr %s1, i32 %index
%wide.load = load <8 x i16>, ptr %l6, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%l13 = getelementptr inbounds i8, ptr %d, i32 %index
store <8 x i8> %l9, ptr %l13, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
Expand All @@ -53,7 +49,7 @@ exit: ; preds = %vector.body
}

; Same as above, but the shuffle has an extra use meaning it shouldnt be deleted
define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8* noalias nocapture %d, i32 %n) {
define signext i8 @alive(ptr noalias nocapture readonly %s1, i16 zeroext %x, ptr noalias nocapture %d, i32 %n) {
; CHECK-LABEL: @alive(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[N:%.*]], -8
Expand All @@ -65,16 +61,14 @@ define signext i8 @alive(i16* noalias nocapture readonly %s1, i16 zeroext %x, i8
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = trunc i16 [[X]] to i8
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, i16* [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L7:%.*]] = bitcast i16* [[L6]] to <8 x i16>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[L7]], align 2
; CHECK-NEXT: [[L6:%.*]] = getelementptr inbounds i16, ptr [[S1:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i16>, ptr [[L6]], align 2
; CHECK-NEXT: [[L8:%.*]] = trunc <8 x i16> [[WIDE_LOAD]] to <8 x i8>
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> undef, <8 x i32> zeroinitializer
; CHECK-NEXT: [[L9:%.*]] = mul <8 x i8> [[TMP2]], [[L8]]
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, i8* [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: [[L14:%.*]] = bitcast i8* [[L13]] to <8 x i8>*
; CHECK-NEXT: store <8 x i8> [[L9]], <8 x i8>* [[L14]], align 1
; CHECK-NEXT: [[L13:%.*]] = getelementptr inbounds i8, ptr [[D:%.*]], i32 [[INDEX]]
; CHECK-NEXT: store <8 x i8> [[L9]], ptr [[L13]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8
; CHECK-NEXT: [[L15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[L15]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
Expand All @@ -91,14 +85,12 @@ entry:

vector.body: ; preds = %vector.body, %entry
%index = phi i32 [ 0, %entry ], [ %index.next, %vector.body ]
%l6 = getelementptr inbounds i16, i16* %s1, i32 %index
%l7 = bitcast i16* %l6 to <8 x i16>*
%wide.load = load <8 x i16>, <8 x i16>* %l7, align 2
%l6 = getelementptr inbounds i16, ptr %s1, i32 %index
%wide.load = load <8 x i16>, ptr %l6, align 2
%l8 = trunc <8 x i16> %wide.load to <8 x i8>
%l9 = mul <8 x i8> %broadcast.splat26, %l8
%l13 = getelementptr inbounds i8, i8* %d, i32 %index
%l14 = bitcast i8* %l13 to <8 x i8>*
store <8 x i8> %l9, <8 x i8>* %l14, align 1
%l13 = getelementptr inbounds i8, ptr %d, i32 %index
store <8 x i8> %l9, ptr %l13, align 1
%index.next = add i32 %index, 8
%l15 = icmp eq i32 %index.next, %n.vec
br i1 %l15, label %exit, label %vector.body
Expand Down
28 changes: 13 additions & 15 deletions llvm/test/Transforms/CodeGenPrepare/ARM/splitgep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,32 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
target triple = "thumbv6m-arm-none-eabi"

; Check that we have deterministic output
define void @test([65536 x i32]** %sp, [65536 x i32]* %t, i32 %n) {
define void @test(ptr %sp, ptr %t, i32 %n) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = bitcast [65536 x i32]* %t to i8*
; CHECK-NEXT: %splitgep1 = getelementptr i8, i8* %0, i32 80000
; CHECK-NEXT: %s = load [65536 x i32]*, [65536 x i32]** %sp
; CHECK-NEXT: %1 = bitcast [65536 x i32]* %s to i8*
; CHECK-NEXT: %splitgep = getelementptr i8, i8* %1, i32 80000
; CHECK-NEXT: %splitgep1 = getelementptr i8, ptr %t, i32 80000
; CHECK-NEXT: %s = load ptr, ptr %sp
; CHECK-NEXT: %splitgep = getelementptr i8, ptr %s, i32 80000
entry:
%s = load [65536 x i32]*, [65536 x i32]** %sp
%s = load ptr, ptr %sp
br label %while_cond

while_cond:
%phi = phi i32 [ 0, %entry ], [ %i, %while_body ]
%gep0 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20000
%gep1 = getelementptr [65536 x i32], [65536 x i32]* %s, i64 0, i32 20001
%gep2 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20000
%gep3 = getelementptr [65536 x i32], [65536 x i32]* %t, i64 0, i32 20001
%gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20000
%gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i32 20001
%gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20000
%gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i32 20001
%cmp = icmp slt i32 %phi, %n
br i1 %cmp, label %while_body, label %while_end

while_body:
%i = add i32 %phi, 1
%j = add i32 %phi, 2
store i32 %i, i32* %gep0
store i32 %phi, i32* %gep1
store i32 %i, i32* %gep2
store i32 %phi, i32* %gep3
store i32 %i, ptr %gep0
store i32 %phi, ptr %gep1
store i32 %i, ptr %gep2
store i32 %phi, ptr %gep3
br label %while_cond

while_end:
Expand Down
82 changes: 40 additions & 42 deletions llvm/test/Transforms/CodeGenPrepare/ARM/tailcall-dup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,83 +2,81 @@

target triple = "armv8m.main-none-eabi"

declare i8* @f0()
declare i8* @f1()
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) nounwind
declare ptr @f0()
declare ptr @f1()
declare void @llvm.lifetime.start.p0(i64, ptr nocapture) nounwind
declare void @llvm.lifetime.end.p0(i64, ptr nocapture) nounwind

define i8* @tail_dup() {
define ptr @tail_dup() {
; CHECK-LABEL: tail_dup
; CHECK: tail call i8* @f0()
; CHECK-NEXT: ret i8*
; CHECK: tail call i8* @f1()
; CHECK-NEXT: ret i8*
; CHECK: tail call ptr @f0()
; CHECK-NEXT: ret ptr
; CHECK: tail call ptr @f1()
; CHECK-NEXT: ret ptr
bb0:
%a = alloca i32
%a1 = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %a1) nounwind
%tmp0 = tail call i8* @f0()
call void @llvm.lifetime.start.p0(i64 -1, ptr %a) nounwind
%tmp0 = tail call ptr @f0()
br label %return
bb1:
%tmp1 = tail call i8* @f1()
%tmp1 = tail call ptr @f1()
br label %return
return:
%retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
%a2 = bitcast i32* %a to i8*
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %a2) nounwind
ret i8* %retval
%retval = phi ptr [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
call void @llvm.lifetime.end.p0(i64 -1, ptr %a) nounwind
ret ptr %retval
}

define nonnull i8* @nonnull_dup() {
define nonnull ptr @nonnull_dup() {
; CHECK-LABEL: nonnull_dup
; CHECK: tail call i8* @f0()
; CHECK-NEXT: ret i8*
; CHECK: tail call i8* @f1()
; CHECK-NEXT: ret i8*
; CHECK: tail call ptr @f0()
; CHECK-NEXT: ret ptr
; CHECK: tail call ptr @f1()
; CHECK-NEXT: ret ptr
bb0:
%tmp0 = tail call i8* @f0()
%tmp0 = tail call ptr @f0()
br label %return
bb1:
%tmp1 = tail call i8* @f1()
%tmp1 = tail call ptr @f1()
br label %return
return:
%retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret i8* %retval
%retval = phi ptr [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret ptr %retval
}

define i8* @noalias_dup() {
define ptr @noalias_dup() {
; CHECK-LABEL: noalias_dup
; CHECK: tail call noalias i8* @f0()
; CHECK-NEXT: ret i8*
; CHECK: tail call noalias i8* @f1()
; CHECK-NEXT: ret i8*
; CHECK: tail call noalias ptr @f0()
; CHECK-NEXT: ret ptr
; CHECK: tail call noalias ptr @f1()
; CHECK-NEXT: ret ptr
bb0:
%tmp0 = tail call noalias i8* @f0()
%tmp0 = tail call noalias ptr @f0()
br label %return
bb1:
%tmp1 = tail call noalias i8* @f1()
%tmp1 = tail call noalias ptr @f1()
br label %return
return:
%retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret i8* %retval
%retval = phi ptr [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret ptr %retval
}

; Use inreg as a way of testing that attributes (other than nonnull and
; noalias) disable the tailcall duplication in cgp.

define inreg i8* @inreg_nodup() {
define inreg ptr @inreg_nodup() {
; CHECK-LABEL: inreg_nodup
; CHECK: tail call i8* @f0()
; CHECK: tail call ptr @f0()
; CHECK-NEXT: br label %return
; CHECK: tail call i8* @f1()
; CHECK: tail call ptr @f1()
; CHECK-NEXT: br label %return
bb0:
%tmp0 = tail call i8* @f0()
%tmp0 = tail call ptr @f0()
br label %return
bb1:
%tmp1 = tail call i8* @f1()
%tmp1 = tail call ptr @f1()
br label %return
return:
%retval = phi i8* [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret i8* %retval
%retval = phi ptr [ %tmp0, %bb0 ], [ %tmp1, %bb1 ]
ret ptr %retval
}
50 changes: 24 additions & 26 deletions llvm/test/Transforms/CodeGenPrepare/Mips/pr35209.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,60 +5,58 @@
; sunken address is not reused if the same address computation occurs
; after the select. Previously, this caused a ICE.

%struct.az = type { i32, %struct.bt* }
%struct.az = type { i32, ptr }
%struct.bt = type { i32 }
%struct.f = type { %struct.ax, %union.anon }
%struct.ax = type { %struct.az* }
%struct.ax = type { ptr }
%union.anon = type { %struct.bd }
%struct.bd = type { i64 }
%struct.bg = type { i32, i32 }
%struct.ap = type { i32, i32 }

@ch = common global %struct.f zeroinitializer, align 8
@j = common global %struct.az* null, align 8
@j = common global ptr null, align 8
@ck = common global i32 0, align 4
@h = common global i32 0, align 4
@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1

define internal void @probestart() {
entry:
%0 = load %struct.az*, %struct.az** @j, align 8
%bw = getelementptr inbounds %struct.az, %struct.az* %0, i64 0, i32 1
%1 = load i32, i32* @h, align 4
%cond = icmp eq i32 %1, 0
%load0 = load ptr, ptr @j, align 8
%bw = getelementptr inbounds %struct.az, ptr %load0, i64 0, i32 1
%load1 = load i32, ptr @h, align 4
%cond = icmp eq i32 %load1, 0
br i1 %cond, label %sw.bb, label %cl

sw.bb: ; preds = %entry
%call = tail call inreg { i64, i64 } @ba(i32* bitcast (%struct.f* @ch to i32*))
%call = tail call inreg { i64, i64 } @ba(ptr @ch)
br label %cl

cl: ; preds = %sw.bb, %entry
%2 = load %struct.bt*, %struct.bt** %bw, align 8
%tobool = icmp eq %struct.bt* %2, null
%3 = load i32, i32* @ck, align 4
%.sink5 = select i1 %tobool, i32* getelementptr (%struct.bg, %struct.bg* bitcast (%union.anon* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1) to %struct.bg*), i64 0, i32 1), i32* getelementptr (%struct.ap, %struct.ap* bitcast (%union.anon* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1) to %struct.ap*), i64 0, i32 1)
store i32 %3, i32* %.sink5, align 4
store i32 1, i32* bitcast (i64* getelementptr inbounds (%struct.f, %struct.f* @ch, i64 0, i32 1, i32 0, i32 0) to i32*), align 8
%4 = load %struct.bt*, %struct.bt** %bw, align 8
tail call void (i8*, ...) @a(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0), %struct.bt* %4)
%load2 = load ptr, ptr %bw, align 8
%tobool = icmp eq ptr %load2, null
%load3 = load i32, ptr @ck, align 4
%.sink5 = select i1 %tobool, ptr getelementptr (%struct.bg, ptr getelementptr inbounds (%struct.f, ptr @ch, i64 0, i32 1), i64 0, i32 1), ptr getelementptr (%struct.ap, ptr getelementptr inbounds (%struct.f, ptr @ch, i64 0, i32 1), i64 0, i32 1)
store i32 %load3, ptr %.sink5, align 4
store i32 1, ptr getelementptr inbounds (%struct.f, ptr @ch, i64 0, i32 1, i32 0, i32 0), align 8
%load4 = load ptr, ptr %bw, align 8
tail call void (ptr, ...) @a(ptr @.str, ptr %load4)
ret void
}

; CHECK-LABEL: @probestart()
; CHECK-LABEL: entry:
; CHECK: %[[I0:[0-9]+]] = load %struct.az*, %struct.az** @j
; CHECK: %[[I0:[a-z0-9]+]] = load ptr, ptr @j
; CHECK-LABEL: cl:

; CHECK-NOT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %bw
; CHECK-NOT: %{{[a-z0-9]+}} = load ptr, ptr %bw
; CHECK-NOT: %{{[.a-z0-9]}} = select
; CHECK-NOT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %bw
; CHECK-NOT: %{{[a-z0-9]+}} = load ptr, ptr %bw

; CHECK: %[[I1:[0-9]+]] = bitcast %struct.az* %[[I0]] to i8*
; CHECK-NEXT: %sunkaddr = getelementptr inbounds i8, i8* %[[I1]], i64 8
; CHECK-NEXT: %[[I2:[0-9]+]] = bitcast i8* %sunkaddr to %struct.bt**
; CHECK-NEXT: %{{[0-9]+}} = load %struct.bt*, %struct.bt** %[[I2]]
; CHECK-NEXT: tail call void (i8*, ...) @a
; CHECK: %sunkaddr = getelementptr inbounds i8, ptr %[[I0]], i64 8
; CHECK-NEXT: %{{[a-z0-9]+}} = load ptr, ptr %sunkaddr
; CHECK-NEXT: tail call void (ptr, ...) @a

declare inreg { i64, i64 } @ba(i32*)
declare inreg { i64, i64 } @ba(ptr)

declare void @a(i8*, ...)
declare void @a(ptr, ...)
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ target triple = "nvptx64-nvidia-cuda"
; numerator is a multiple of the denominator).
;
; CHECK-LABEL: @test
define void @test(i64 %a, i64 %b, i64* %retptr) {
define void @test(i64 %a, i64 %b, ptr %retptr) {
; CHECK: udiv i32
%d = sdiv i64 %a, %b
store i64 %d, i64* %retptr
store i64 %d, ptr %retptr
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,25 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

; No bypassing should be done in apparently unsuitable cases.
define void @Test_no_bypassing(i32 %a, i64 %b, i64* %retptr) {
define void @Test_no_bypassing(i32 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: @Test_no_bypassing(
; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[A_2:%.*]] = sub i64 -1, [[A_1]]
; CHECK-NEXT: [[RES:%.*]] = srem i64 [[A_2]], [[B:%.*]]
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[RES]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%a.1 = zext i32 %a to i64
; %a.2 is always negative so the division cannot be bypassed.
%a.2 = sub i64 -1, %a.1
%res = srem i64 %a.2, %b
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

; No OR instruction is needed if one of the operands (divisor) is known
; to fit into 32 bits.
define void @Test_check_one_operand(i64 %a, i32 %b, i64* %retptr) {
define void @Test_check_one_operand(i64 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: @Test_check_one_operand(
; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[A:%.*]], -4294967296
Expand All @@ -37,38 +37,38 @@ define void @Test_check_one_operand(i64 %a, i32 %b, i64* %retptr) {
; CHECK: [[TMP9:%.*]] = sdiv i64 [[A]], [[B_1]]
; CHECK-NEXT: br label [[TMP10]]
; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[TMP11]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%b.1 = zext i32 %b to i64
%res = sdiv i64 %a, %b.1
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

; If both operands are known to fit into 32 bits, then replace the division
; in-place without CFG modification.
define void @Test_check_none(i64 %a, i32 %b, i64* %retptr) {
define void @Test_check_none(i64 %a, i32 %b, ptr %retptr) {
; CHECK-LABEL: @Test_check_none(
; CHECK-NEXT: [[A_1:%.*]] = and i64 [[A:%.*]], 4294967295
; CHECK-NEXT: [[B_1:%.*]] = zext i32 [[B:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_1]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[B_1]] to i32
; CHECK-NEXT: [[TMP3:%.*]] = udiv i32 [[TMP1]], [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; CHECK-NEXT: store i64 [[TMP4]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[TMP4]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%a.1 = and i64 %a, 4294967295
%b.1 = zext i32 %b to i64
%res = udiv i64 %a.1, %b.1
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

; In case of unsigned long division with a short dividend,
; the long division is not needed any more.
define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
define void @Test_special_case(i32 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: @Test_special_case(
; CHECK-NEXT: [[A_1:%.*]] = zext i32 [[A:%.*]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i64 [[A_1]], [[B:%.*]]
Expand All @@ -83,33 +83,33 @@ define void @Test_special_case(i32 %a, i64 %b, i64* %retptr) {
; CHECK: [[TMP10:%.*]] = phi i64 [ [[TMP7]], [[TMP2]] ], [ 0, [[TMP0:%.*]] ]
; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP8]], [[TMP2]] ], [ [[A_1]], [[TMP0]] ]
; CHECK-NEXT: [[RES:%.*]] = add i64 [[TMP10]], [[TMP11]]
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[RES]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%a.1 = zext i32 %a to i64
%div = udiv i64 %a.1, %b
%rem = urem i64 %a.1, %b
%res = add i64 %div, %rem
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}


; Do not bypass a division if one of the operands looks like a hash value.
define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
define void @Test_dont_bypass_xor(i64 %a, i64 %b, i64 %l, ptr %retptr) {
; CHECK-LABEL: @Test_dont_bypass_xor(
; CHECK-NEXT: [[C:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
; CHECK-NEXT: [[RES:%.*]] = udiv i64 [[C]], [[L:%.*]]
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[RES]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%c = xor i64 %a, %b
%res = udiv i64 %c, %l
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, ptr %retptr) {
; CHECK-LABEL: @Test_dont_bypass_phi_xor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[B:%.*]], 0
Expand All @@ -120,7 +120,7 @@ define void @Test_dont_bypass_phi_xor(i64 %a, i64 %b, i64 %l, i64* %retptr) {
; CHECK: merge:
; CHECK-NEXT: [[E:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[C]], [[XORPATH]] ]
; CHECK-NEXT: [[RES:%.*]] = sdiv i64 [[E]], [[L:%.*]]
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[RES]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -134,24 +134,24 @@ xorpath:
merge:
%e = phi i64 [ undef, %entry ], [ %c, %xorpath ]
%res = sdiv i64 %e, %l
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, i64* %retptr) {
define void @Test_dont_bypass_mul_long_const(i64 %a, i64 %l, ptr %retptr) {
; CHECK-LABEL: @Test_dont_bypass_mul_long_const(
; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], 5229553307
; CHECK-NEXT: [[RES:%.*]] = urem i64 [[C]], [[L:%.*]]
; CHECK-NEXT: store i64 [[RES]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[RES]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%c = mul i64 %a, 5229553307 ; the constant doesn't fit 32 bits
%res = urem i64 %c, %l
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, ptr %retptr) {
; CHECK-LABEL: @Test_bypass_phi_mul_const(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_MUL:%.*]] = mul nsw i64 [[A:%.*]], 34806414968801
Expand All @@ -173,7 +173,7 @@ define void @Test_bypass_phi_mul_const(i64 %a, i64 %b, i64* %retptr) {
; CHECK: [[TMP9:%.*]] = sdiv i64 [[LHS]], [[B]]
; CHECK-NEXT: br label [[TMP10]]
; CHECK: [[TMP11:%.*]] = phi i64 [ [[TMP7]], [[TMP3]] ], [ [[TMP9]], [[TMP8]] ]
; CHECK-NEXT: store i64 [[TMP11]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[TMP11]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -187,11 +187,11 @@ branch:
merge:
%lhs = phi i64 [ 42, %branch ], [ %a.mul, %entry ]
%res = sdiv i64 %lhs, %b
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}

define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
define void @Test_bypass_mul_short_const(i64 %a, i64 %l, ptr %retptr) {
; CHECK-LABEL: @Test_bypass_mul_short_const(
; CHECK-NEXT: [[C:%.*]] = mul i64 [[A:%.*]], -42
; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[C]], [[L:%.*]]
Expand All @@ -206,11 +206,11 @@ define void @Test_bypass_mul_short_const(i64 %a, i64 %l, i64* %retptr) {
; CHECK: [[TMP10:%.*]] = urem i64 [[C]], [[L]]
; CHECK-NEXT: br label [[TMP11]]
; CHECK: [[TMP12:%.*]] = phi i64 [ [[TMP8]], [[TMP4]] ], [ [[TMP10]], [[TMP9]] ]
; CHECK-NEXT: store i64 [[TMP12]], i64* [[RETPTR:%.*]]
; CHECK-NEXT: store i64 [[TMP12]], ptr [[RETPTR:%.*]]
; CHECK-NEXT: ret void
;
%c = mul i64 %a, -42
%res = urem i64 %c, %l
store i64 %res, i64* %retptr
store i64 %res, ptr %retptr
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,26 @@ target triple = "nvptx64-nvidia-cuda"

; We only use the div instruction -- the rem should be DCE'ed.
; CHECK-LABEL: @div_only
define void @div_only(i64 %a, i64 %b, i64* %retptr) {
define void @div_only(i64 %a, i64 %b, ptr %retptr) {
; CHECK: udiv i32
; CHECK-NOT: urem
; CHECK: sdiv i64
; CHECK-NOT: rem
%d = sdiv i64 %a, %b
store i64 %d, i64* %retptr
store i64 %d, ptr %retptr
ret void
}

; We only use the rem instruction -- the div should be DCE'ed.
; CHECK-LABEL: @rem_only
define void @rem_only(i64 %a, i64 %b, i64* %retptr) {
define void @rem_only(i64 %a, i64 %b, ptr %retptr) {
; CHECK-NOT: div
; CHECK: urem i32
; CHECK-NOT: div
; CHECK: rem i64
; CHECK-NOT: div
%d = srem i64 %a, %b
store i64 %d, i64* %retptr
store i64 %d, ptr %retptr
ret void
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,36 +8,35 @@ target triple = "nvptx64-nvidia-cuda"
; which we can't sink into an addrspacecast

; CHECK-LABEL: @test
define void @test(i8* %input_ptr) {
define void @test(ptr %input_ptr) {
; CHECK-LABEL: l1:
; CHECK-NOT: addrspacecast
%intptr = ptrtoint i8* %input_ptr to i64
%ptr = inttoptr i64 %intptr to i32 addrspace(3)*
%intptr = ptrtoint ptr %input_ptr to i64
%ptr = inttoptr i64 %intptr to ptr addrspace(3)

br label %l1
l1:

store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4
store atomic i32 1, ptr addrspace(3) %ptr unordered, align 4
ret void
}


; we still should be able to look through multiple sequences of inttoptr/ptrtoint

; CHECK-LABEL: @test2
define void @test2(i8* %input_ptr) {
define void @test2(ptr %input_ptr) {
; CHECK-LABEL: l2:
; CHECK: bitcast
; CHECK-NEXT: store
%intptr = ptrtoint i8* %input_ptr to i64
%ptr = inttoptr i64 %intptr to i32 addrspace(3)*
%intptr = ptrtoint ptr %input_ptr to i64
%ptr = inttoptr i64 %intptr to ptr addrspace(3)

%intptr2 = ptrtoint i32 addrspace(3)* %ptr to i64
%ptr2 = inttoptr i64 %intptr2 to i32*
%intptr2 = ptrtoint ptr addrspace(3) %ptr to i64
%ptr2 = inttoptr i64 %intptr2 to ptr

br label %l2
l2:

store atomic i32 1, i32* %ptr2 unordered, align 4
store atomic i32 1, ptr %ptr2 unordered, align 4
ret void
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-nvidia-cuda"

; CHECK-LABEL: @test
define i64 @test(i1 %pred, i64* %ptr) {
define i64 @test(i1 %pred, ptr %ptr) {
; CHECK: addrspacecast
%ptr_as1 = addrspacecast i64* %ptr to i64 addrspace(1)*
%ptr_as1 = addrspacecast ptr %ptr to ptr addrspace(1)
br i1 %pred, label %l1, label %l2
l1:
; CHECK-LABEL: l1:
; CHECK-NOT: addrspacecast
%v1 = load i64, i64* %ptr
%v1 = load i64, ptr %ptr
ret i64 %v1
l2:
; CHECK-LABEL: l2:
; CHECK-NOT: addrspacecast
%v2 = load i64, i64 addrspace(1)* %ptr_as1
%v2 = load i64, ptr addrspace(1) %ptr_as1
ret i64 %v2
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,16 @@
; RUN: opt -S -codegenprepare -mtriple=powerpc64-unknown-linux-gnu -data-layout="E-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=BE %s
; RUN: opt -S -codegenprepare -mtriple=powerpc64le-unknown-linux-gnu -data-layout="e-m:e-i64:64-n32:64" -force-split-store < %s | FileCheck --check-prefix=LE %s

define void @split_store_align1(float %x, i64* %p) {
define void @split_store_align1(float %x, ptr %p) {
; BE-LABEL: @split_store_align1(
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 1
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
; BE-NEXT: store i32 0, i32* [[TMP3]], align 1
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
; BE-NEXT: store i32 [[B]], ptr [[TMP2]], align 1
; BE-NEXT: store i32 0, ptr [[P]], align 1
; BE-NEXT: ret void
;
; LE-LABEL: @split_store_align1(
Expand All @@ -22,34 +20,30 @@ define void @split_store_align1(float %x, i64* %p) {
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 1
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
; LE-NEXT: store i32 0, i32* [[TMP3]], align 1
; LE-NEXT: store i32 [[B]], ptr [[P:%.*]], align 1
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 1
; LE-NEXT: store i32 0, ptr [[TMP3]], align 1
; LE-NEXT: ret void
;
%b = bitcast float %x to i32
%z = zext i32 0 to i64
%s = shl nuw nsw i64 %z, 32
%z2 = zext i32 %b to i64
%o = or i64 %s, %z2
store i64 %o, i64* %p, align 1
store i64 %o, ptr %p, align 1
ret void
}

define void @split_store_align2(float %x, i64* %p) {
define void @split_store_align2(float %x, ptr %p) {
; BE-LABEL: @split_store_align2(
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 2
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
; BE-NEXT: store i32 0, i32* [[TMP3]], align 2
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
; BE-NEXT: store i32 [[B]], ptr [[TMP2]], align 2
; BE-NEXT: store i32 0, ptr [[P]], align 2
; BE-NEXT: ret void
;
; LE-LABEL: @split_store_align2(
Expand All @@ -58,34 +52,30 @@ define void @split_store_align2(float %x, i64* %p) {
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 2
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
; LE-NEXT: store i32 0, i32* [[TMP3]], align 2
; LE-NEXT: store i32 [[B]], ptr [[P:%.*]], align 2
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 1
; LE-NEXT: store i32 0, ptr [[TMP3]], align 2
; LE-NEXT: ret void
;
%b = bitcast float %x to i32
%z = zext i32 0 to i64
%s = shl nuw nsw i64 %z, 32
%z2 = zext i32 %b to i64
%o = or i64 %s, %z2
store i64 %o, i64* %p, align 2
store i64 %o, ptr %p, align 2
ret void
}

define void @split_store_align8(float %x, i64* %p) {
define void @split_store_align8(float %x, ptr %p) {
; BE-LABEL: @split_store_align8(
; BE-NEXT: [[B:%.*]] = bitcast float [[X:%.*]] to i32
; BE-NEXT: [[Z:%.*]] = zext i32 0 to i64
; BE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; BE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; BE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; BE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 1
; BE-NEXT: store i32 [[B]], i32* [[TMP2]], align 4
; BE-NEXT: [[TMP3:%.*]] = bitcast i64* [[P]] to i32*
; BE-NEXT: store i32 0, i32* [[TMP3]], align 8
; BE-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 1
; BE-NEXT: store i32 [[B]], ptr [[TMP2]], align 4
; BE-NEXT: store i32 0, ptr [[P]], align 8
; BE-NEXT: ret void
;
; LE-LABEL: @split_store_align8(
Expand All @@ -94,18 +84,16 @@ define void @split_store_align8(float %x, i64* %p) {
; LE-NEXT: [[S:%.*]] = shl nuw nsw i64 [[Z]], 32
; LE-NEXT: [[Z2:%.*]] = zext i32 [[B]] to i64
; LE-NEXT: [[O:%.*]] = or i64 [[S]], [[Z2]]
; LE-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to i32*
; LE-NEXT: store i32 [[B]], i32* [[TMP1]], align 8
; LE-NEXT: [[TMP2:%.*]] = bitcast i64* [[P]] to i32*
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 1
; LE-NEXT: store i32 0, i32* [[TMP3]], align 4
; LE-NEXT: store i32 [[B]], ptr [[P:%.*]], align 8
; LE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 1
; LE-NEXT: store i32 0, ptr [[TMP3]], align 4
; LE-NEXT: ret void
;
%b = bitcast float %x to i32
%z = zext i32 0 to i64
%s = shl nuw nsw i64 %z, 32
%z2 = zext i32 %b to i64
%o = or i64 %s, %z2
store i64 %o, i64* %p, align 8
store i64 %o, ptr %p, align 8
ret void
}
14 changes: 7 additions & 7 deletions llvm/test/Transforms/CodeGenPrepare/RISCV/and-mask-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define i32 @and_sink1(i32 %a, i1 %c) {
; NOZBS-NEXT: br label [[BB0:%.*]]
; NOZBS: bb0:
; NOZBS-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; NOZBS-NEXT: store i32 0, i32* @A, align 4
; NOZBS-NEXT: store i32 0, ptr @A, align 4
; NOZBS-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]]
; NOZBS: bb2:
; NOZBS-NEXT: ret i32 0
Expand All @@ -28,7 +28,7 @@ define i32 @and_sink1(i32 %a, i1 %c) {
; ZBS: bb0:
; ZBS-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 2048
; ZBS-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
; ZBS-NEXT: store i32 0, i32* @A, align 4
; ZBS-NEXT: store i32 0, ptr @A, align 4
; ZBS-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]]
; ZBS: bb2:
; ZBS-NEXT: ret i32 0
Expand All @@ -37,7 +37,7 @@ define i32 @and_sink1(i32 %a, i1 %c) {
br label %bb0
bb0:
%cmp = icmp eq i32 %and, 0
store i32 0, i32* @A
store i32 0, ptr @A
br i1 %cmp, label %bb0, label %bb2
bb2:
ret i32 0
Expand All @@ -50,7 +50,7 @@ define i32 @and_sink2(i32 %a) {
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: store i32 0, i32* @A, align 4
; CHECK-NEXT: store i32 0, ptr @A, align 4
; CHECK-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: ret i32 0
Expand All @@ -59,7 +59,7 @@ define i32 @and_sink2(i32 %a) {
br label %bb0
bb0:
%cmp = icmp eq i32 %and, 0
store i32 0, i32* @A
store i32 0, ptr @A
br i1 %cmp, label %bb0, label %bb2
bb2:
ret i32 0
Expand All @@ -72,7 +72,7 @@ define i32 @and_sink3(i32 %a) {
; CHECK-NEXT: br label [[BB0:%.*]]
; CHECK: bb0:
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0
; CHECK-NEXT: store i32 0, i32* @A, align 4
; CHECK-NEXT: store i32 0, ptr @A, align 4
; CHECK-NEXT: br i1 [[CMP]], label [[BB0]], label [[BB2:%.*]]
; CHECK: bb2:
; CHECK-NEXT: ret i32 0
Expand All @@ -81,7 +81,7 @@ define i32 @and_sink3(i32 %a) {
br label %bb0
bb0:
%cmp = icmp eq i32 %and, 0
store i32 0, i32* @A
store i32 0, ptr @A
br i1 %cmp, label %bb0, label %bb2
bb2:
ret i32 0
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
ret i64 %Q
}

define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo1_math_overflow_used(
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: store i64 [[MATH]], i64* [[RES:%.*]]
; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
; CHECK-NEXT: ret i64 [[Q]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
%Q = select i1 %cmp, i64 %b, i64 42
store i64 %add, i64* %res
store i64 %add, ptr %res
ret i64 %Q
}

Expand All @@ -50,19 +50,19 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
ret i64 %Q
}

define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo2_math_overflow_used(
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: store i64 [[MATH]], i64* [[RES:%.*]]
; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
; CHECK-NEXT: ret i64 [[Q]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
%Q = select i1 %cmp, i64 %b, i64 42
store i64 %add, i64* %res
store i64 %add, ptr %res
ret i64 %Q
}

Expand All @@ -79,23 +79,23 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
ret i64 %Q
}

define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, i64* %res) nounwind ssp {
define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo3_math_overflow_used(
; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: store i64 [[MATH]], i64* [[RES:%.*]]
; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
; CHECK-NEXT: ret i64 [[Q]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
%Q = select i1 %cmp, i64 %b, i64 42
store i64 %add, i64* %res
store i64 %add, ptr %res
ret i64 %Q
}

define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, i64* %p) {
define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @usubo_ult_i64_overflow_used(
; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
Expand All @@ -106,15 +106,15 @@ define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, i64* %p) {
ret i1 %ov
}

define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, i64* %p) {
define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @usubo_ult_i64_math_overflow_used(
; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: store i64 [[S]], i64* [[P:%.*]]
; CHECK-NEXT: store i64 [[S]], ptr [[P:%.*]]
; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
; CHECK-NEXT: ret i1 [[OV]]
;
%s = sub i64 %x, %y
store i64 %s, i64* %p
store i64 %s, ptr %p
%ov = icmp ult i64 %x, %y
ret i1 %ov
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"

define fastcc i32 @ascii2flt(i8* %str) nounwind {
define fastcc i32 @ascii2flt(ptr %str) nounwind {
entry:
br label %bb2.i

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/CodeGenPrepare/X86/bitreverse-hang.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
; CHECK: define i32 @fn1
define i32 @fn1() #0 {
entry:
%b.promoted = load i32, i32* @b, align 4, !tbaa !2
%b.promoted = load i32, ptr @b, align 4, !tbaa !2
br label %for.body

for.body: ; preds = %for.body, %entry
Expand All @@ -36,7 +36,7 @@ for.body: ; preds = %for.body, %entry
br i1 %exitcond, label %for.end, label %for.body

for.end: ; preds = %for.body
store i32 %or, i32* @b, align 4, !tbaa !2
store i32 %or, ptr @b, align 4, !tbaa !2
ret i32 undef
}

Expand Down
30 changes: 13 additions & 17 deletions llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,24 @@ declare i32 @__CxxFrameHandler3(...)

declare void @f()

declare void @g(i8*)
declare void @g(ptr)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #2

; CodeGenPrepare will want to sink these bitcasts, but it selects the catchpad
; blocks as the place to which the bitcast should be sunk. Since catchpads
; do not allow non-phi instructions before the terminator, this isn't possible.

; CHECK-LABEL: @test(
define void @test(i32* %addr) personality i32 (...)* @__CxxFrameHandler3 {
define void @test(ptr %addr) personality ptr @__CxxFrameHandler3 {
entry:
%x = getelementptr i32, i32* %addr, i32 1
%p1 = bitcast i32* %x to i8*
%x = getelementptr i32, ptr %addr, i32 1
invoke void @f()
to label %invoke.cont unwind label %catch1

; CHECK: invoke.cont:
; CHECK-NEXT: %y = getelementptr i32, i32* %addr, i32 2
; CHECK-NEXT: %y = getelementptr i32, ptr %addr, i32 2
invoke.cont:
%y = getelementptr i32, i32* %addr, i32 2
%p2 = bitcast i32* %y to i8*
%y = getelementptr i32, ptr %addr, i32 2
invoke void @f()
to label %done unwind label %catch2

Expand All @@ -43,7 +41,6 @@ handler1:
br label %catch.shared
; CHECK: handler1:
; CHECK-NEXT: catchpad within %cs1
; CHECK: %[[p1:[0-9]+]] = bitcast i32* %x to i8*

catch2:
%cs2 = catchswitch within none [label %handler2] unwind to caller
Expand All @@ -53,21 +50,20 @@ handler2:
br label %catch.shared
; CHECK: handler2:
; CHECK: catchpad within %cs2
; CHECK: %[[p2:[0-9]+]] = bitcast i32* %y to i8*

; CHECK: catch.shared:
; CHECK-NEXT: %p = phi i8* [ %[[p1]], %handler1 ], [ %[[p2]], %handler2 ]
; CHECK-NEXT: %p = phi ptr [ %x, %handler1 ], [ %y, %handler2 ]
catch.shared:
%p = phi i8* [ %p1, %handler1 ], [ %p2, %handler2 ]
call void @g(i8* %p)
%p = phi ptr [ %x, %handler1 ], [ %y, %handler2 ]
call void @g(ptr %p)
unreachable
}

; CodeGenPrepare will want to hoist these llvm.dbg.value calls to the phi, but
; there is no insertion point in a catchpad block.

; CHECK-LABEL: @test_dbg_value(
define void @test_dbg_value() personality i32 (...)* @__CxxFrameHandler3 {
define void @test_dbg_value() personality ptr @__CxxFrameHandler3 {
entry:
%a = alloca i8
%b = alloca i8
Expand All @@ -78,17 +74,17 @@ ret:
ret void

catch.dispatch:
%p = phi i8* [%a, %entry], [%b, %next]
%p = phi ptr [%a, %entry], [%b, %next]
%cs1 = catchswitch within none [label %catch] unwind to caller

catch:
%cp1 = catchpad within %cs1 []
tail call void @llvm.dbg.value(metadata i8* %p, i64 0, metadata !11, metadata !13), !dbg !14
call void @g(i8* %p)
tail call void @llvm.dbg.value(metadata ptr %p, i64 0, metadata !11, metadata !13), !dbg !14
call void @g(ptr %p)
catchret from %cp1 to label %ret

; CHECK: catch.dispatch:
; CHECK-NEXT: phi i8*
; CHECK-NEXT: phi ptr
; CHECK-NEXT: catchswitch
; CHECK-NOT: llvm.dbg.value

Expand Down
188 changes: 93 additions & 95 deletions llvm/test/Transforms/CodeGenPrepare/X86/computedgoto.ll

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,22 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s

define i32 @test1(i8* %d) nounwind {
define i32 @test1(ptr %d) nounwind {
; CHECK-LABEL: @test1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L:%.*]] = load i8, i8* [[D:%.*]], align 1
; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[D:%.*]], align 1
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i8 [[L]], 0
; CHECK-NEXT: [[CONV:%.*]] = zext i1 [[TMP0]] to i32
; CHECK-NEXT: ret i32 [[CONV]]
;
entry:
%l = load i8, i8* %d
%l = load i8, ptr %d
%cmp = icmp eq i8 %l, 0
br i1 %cmp, label %exit, label %if.end

if.end:
%gep = getelementptr i8, i8* %d, i32 42
%call = call i64 @foo(i8* %gep) nounwind readonly willreturn
%gep = getelementptr i8, ptr %d, i32 42
%call = call i64 @foo(ptr %gep) nounwind readonly willreturn
%cmp2 = icmp ne i64 %call, 0
call void @llvm.assume(i1 %cmp2)
br label %exit
Expand All @@ -26,5 +26,5 @@ exit:
ret i32 %conv
}

declare i64 @foo(i8*) nounwind readonly willreturn
declare i64 @foo(ptr) nounwind readonly willreturn
declare void @llvm.assume(i1 noundef) nounwind willreturn
30 changes: 15 additions & 15 deletions llvm/test/Transforms/CodeGenPrepare/X86/ext-logicop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,22 @@
declare void @foo()

; ext(and(ld, cst)) -> and(ext(ld), ext(cst))
define void @test1(i32* %p, i32 %ll) {
define void @test1(ptr %p, i32 %ll) {
; CHECK-LABEL: @test1
; CHECK-NEXT: entry:
; CHECK-NEXT: load
; CHECK-NEXT: zext
; CHECK-NEXT: and
entry:
%tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
%tmp = load i8, ptr @a, align 1
%and = and i8 %tmp, 60
%cmp = icmp ugt i8 %and, 20
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
%conv2 = zext i8 %and to i32
%add = add nsw i32 %conv2, %ll
store i32 %add, i32* %p, align 4
store i32 %add, ptr %p, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand All @@ -29,22 +29,22 @@ if.end: ; preds = %if.then, %entry
}

; ext(or(ld, cst)) -> or(ext(ld), ext(cst))
define void @test2(i32* %p, i32 %ll) {
define void @test2(ptr %p, i32 %ll) {
; CHECK-LABEL: @test2
; CHECK-NEXT: entry:
; CHECK-NEXT: load
; CHECK-NEXT: zext
; CHECK-NEXT: or
entry:
%tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
%tmp = load i8, ptr @a, align 1
%or = or i8 %tmp, 60
%cmp = icmp ugt i8 %or, 20
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
%conv2 = zext i8 %or to i32
%add = add nsw i32 %conv2, %ll
store i32 %add, i32* %p, align 4
store i32 %add, ptr %p, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand All @@ -53,15 +53,15 @@ if.end: ; preds = %if.then, %entry
}

; ext(and(shl(ld, cst), cst)) -> and(shl(ext(ld), ext(cst)), ext(cst))
define void @test3(i32* %p, i32 %ll) {
define void @test3(ptr %p, i32 %ll) {
; CHECK-LABEL: @test3
; CHECK-NEXT: entry:
; CHECK-NEXT: load
; CHECK-NEXT: zext
; CHECK-NEXT: shl
; CHECK-NEXT: and
entry:
%tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
%tmp = load i8, ptr @a, align 1
%shl = shl i8 %tmp, 2
%and = and i8 %shl, 60
%cmp = icmp ugt i8 %and, 20
Expand All @@ -70,7 +70,7 @@ entry:
if.then: ; preds = %entry
%conv2 = zext i8 %and to i32
%add = add nsw i32 %conv2, %ll
store i32 %add, i32* %p, align 4
store i32 %add, ptr %p, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand All @@ -79,22 +79,22 @@ if.end: ; preds = %if.then, %entry
}

; zext(shrl(ld, cst)) -> shrl(zext(ld), zext(cst))
define void @test4(i32* %p, i32 %ll) {
define void @test4(ptr %p, i32 %ll) {
; CHECK-LABEL: @test4
; CHECK-NEXT: entry:
; CHECK-NEXT: load
; CHECK-NEXT: zext
; CHECK-NEXT: lshr
entry:
%tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
%tmp = load i8, ptr @a, align 1
%lshr = lshr i8 %tmp, 2
%cmp = icmp ugt i8 %lshr, 20
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
%conv2 = zext i8 %lshr to i32
%add = add nsw i32 %conv2, %ll
store i32 %add, i32* %p, align 4
store i32 %add, ptr %p, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand All @@ -103,22 +103,22 @@ if.end: ; preds = %if.then, %entry
}

; ext(xor(ld, cst)) -> xor(ext(ld), ext(cst))
define void @test5(i32* %p, i32 %ll) {
define void @test5(ptr %p, i32 %ll) {
; CHECK-LABEL: @test5
; CHECK-NEXT: entry:
; CHECK-NEXT: load
; CHECK-NEXT: zext
; CHECK-NEXT: xor
entry:
%tmp = load i8, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @a, i64 0, i64 0), align 1
%tmp = load i8, ptr @a, align 1
%xor = xor i8 %tmp, 60
%cmp = icmp ugt i8 %xor, 20
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
%conv2 = zext i8 %xor to i32
%add = add nsw i32 %conv2, %ll
store i32 %add, i32* %p, align 4
store i32 %add, ptr %p, align 4
br label %if.end

if.end: ; preds = %if.then, %entry
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,40 @@ target triple = "x86_64-unknown-linux-gnu"

; The first cast should be sunk into block2, in order that the
; instruction selector can form an efficient
; i64 * i64 -> i128 multiplication.
define i128 @sink(i64* %mem1, i64* %mem2) {
; ptr i64 -> i128 multiplication.
define i128 @sink(ptr %mem1, ptr %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
block1:
%l1 = load i64, i64* %mem1
%l1 = load i64, ptr %mem1
%s1 = sext i64 %l1 to i128
br label %block2

; CHECK-NEXT: sext
; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%l2 = load i64, i64* %mem2
%l2 = load i64, ptr %mem2
%s2 = sext i64 %l2 to i128
%res = mul i128 %s1, %s2
ret i128 %res
}

; The first cast should be hoisted into block1, in order that the
; instruction selector can form an extend-load.
define i64 @hoist(i32* %mem1, i32* %mem2) {
define i64 @hoist(ptr %mem1, ptr %mem2) {
; CHECK-LABEL: block1:
; CHECK-NEXT: load
; CHECK-NEXT: sext
block1:
%l1 = load i32, i32* %mem1
%l1 = load i32, ptr %mem1
br label %block2

; CHECK-NEXT: load
; CHECK-NEXT: sext
block2:
%s1 = sext i32 %l1 to i64
%l2 = load i32, i32* %mem2
%l2 = load i32, ptr %mem2
%s2 = sext i32 %l2 to i64
%res = mul i64 %s1, %s2
ret i64 %res
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/CodeGenPrepare/X86/freeze-brcond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,13 @@ B:
ret void
}

define i1 @ptrcmp(i8* %p) {
define i1 @ptrcmp(ptr %p) {
; CHECK-LABEL: @ptrcmp(
; CHECK-NEXT: [[FR:%.*]] = freeze i8* [[P:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp eq i8* [[FR]], null
; CHECK-NEXT: [[FR:%.*]] = freeze ptr [[P:%.*]]
; CHECK-NEXT: [[C:%.*]] = icmp eq ptr [[FR]], null
; CHECK-NEXT: ret i1 [[C]]
;
%c = icmp eq i8* %p, null
%c = icmp eq ptr %p, null
%fr = freeze i1 %c
ret i1 %fr
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,105 +10,105 @@ target triple = "x86_64-unknown-linux-gnu"
@c = external dso_local global %struct.a, align 4
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16

define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) {
define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) {
; CHECK-LABEL: @splat_base(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> poison, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0
%broadcast.splat = shufflevector <4 x ptr> %broadcast.splatinsert, <4 x ptr> poison, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x ptr> %broadcast.splat, <4 x i64> %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @splat_struct(%struct.a* %base) {
define <4 x i32> @splat_struct(ptr %base) {
; CHECK-LABEL: @splat_struct(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @scalar_index(i32* %base, i64 %index) {
define <4 x i32> @scalar_index(ptr %base, i64 %index) {
; CHECK-LABEL: @scalar_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> poison, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> poison, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%broadcast.splatinsert = insertelement <4 x ptr> poison, ptr %base, i32 0
%broadcast.splat = shufflevector <4 x ptr> %broadcast.splatinsert, <4 x ptr> poison, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x ptr> %broadcast.splat, i64 %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @splat_index(i32* %base, i64 %index) {
define <4 x i32> @splat_index(ptr %base, i64 %index) {
; CHECK-LABEL: @splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i64> poison, i64 %index, i32 0
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
%gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%gep = getelementptr i32, ptr %base, <4 x i64> %broadcast.splat
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @test_global_array(<4 x i64> %indxs) {
; CHECK-LABEL: @test_global_array(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[G]]
;
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs
%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs
%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %g
}

define <4 x i32> @global_struct_splat() {
; CHECK-LABEL: @global_struct_splat(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> <ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = insertelement <4 x %struct.a*> poison, %struct.a* @c, i32 0
%2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> poison, <4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1
%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%1 = insertelement <4 x ptr> poison, ptr @c, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> poison, <4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <4 x ptr> %2, <4 x i64> zeroinitializer, i32 1
%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %4
}

define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
; CHECK-LABEL: @splat_ptr_gather(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> poison, <4 x i32> zeroinitializer
%3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
%1 = insertelement <4 x ptr> poison, ptr %ptr, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> poison, <4 x i32> zeroinitializer
%3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
ret <4 x i32> %3
}

define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
; CHECK-LABEL: @splat_ptr_scatter(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL:%.*]], <4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
; CHECK-NEXT: ret void
;
%1 = insertelement <4 x i32*> poison, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> poison, <4 x i32> zeroinitializer
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask)
%1 = insertelement <4 x ptr> poison, ptr %ptr, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> poison, <4 x i32> zeroinitializer
call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %2, i32 4, <4 x i1> %mask)
ret void
}

declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
100 changes: 50 additions & 50 deletions llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,105 +9,105 @@ target triple = "x86_64-unknown-linux-gnu"
@c = external dso_local global %struct.a, align 4
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16

define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) {
define <4 x i32> @splat_base(ptr %base, <4 x i64> %index) {
; CHECK-LABEL: @splat_base(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0
%broadcast.splat = shufflevector <4 x ptr> %broadcast.splatinsert, <4 x ptr> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x ptr> %broadcast.splat, <4 x i64> %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @splat_struct(%struct.a* %base) {
define <4 x i32> @splat_struct(ptr %base) {
; CHECK-LABEL: @splat_struct(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], ptr [[BASE:%.*]], i64 0, i32 1
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%gep = getelementptr %struct.a, ptr %base, <4 x i64> zeroinitializer, i32 1
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @scalar_index(i32* %base, i64 %index) {
define <4 x i32> @scalar_index(ptr %base, i64 %index) {
; CHECK-LABEL: @scalar_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0
%broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%broadcast.splatinsert = insertelement <4 x ptr> undef, ptr %base, i32 0
%broadcast.splat = shufflevector <4 x ptr> %broadcast.splatinsert, <4 x ptr> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, <4 x ptr> %broadcast.splat, i64 %index
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @splat_index(i32* %base, i64 %index) {
define <4 x i32> @splat_index(ptr %base, i64 %index) {
; CHECK-LABEL: @splat_index(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[INDEX:%.*]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0
%broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
%gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%gep = getelementptr i32, ptr %base, <4 x i64> %broadcast.splat
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %res
}

define <4 x i32> @test_global_array(<4 x i64> %indxs) {
; CHECK-LABEL: @test_global_array(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr @glob_array, <4 x i64> [[INDXS:%.*]]
; CHECK-NEXT: [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[G]]
;
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs
%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%p = getelementptr inbounds [16 x i32], ptr @glob_array, i64 0, <4 x i64> %indxs
%g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %g
}

define <4 x i32> @global_struct_splat() {
; CHECK-LABEL: @global_struct_splat(
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> <ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1), ptr getelementptr inbounds (%struct.a, ptr @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
;
%1 = insertelement <4 x %struct.a*> undef, %struct.a* @c, i32 0
%2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> undef, <4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1
%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
%1 = insertelement <4 x ptr> undef, ptr @c, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> undef, <4 x i32> zeroinitializer
%3 = getelementptr %struct.a, <4 x ptr> %2, <4 x i64> zeroinitializer, i32 1
%4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
ret <4 x i32> %4
}

define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
define <4 x i32> @splat_ptr_gather(ptr %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
; CHECK-LABEL: @splat_ptr_gather(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]], <4 x i32> [[PASSTHRU:%.*]])
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
;
%1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
%3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
%1 = insertelement <4 x ptr> undef, ptr %ptr, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> undef, <4 x i32> zeroinitializer
%3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
ret <4 x i32> %3
}

define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
define void @splat_ptr_scatter(ptr %ptr, <4 x i1> %mask, <4 x i32> %val) {
; CHECK-LABEL: @splat_ptr_scatter(
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL:%.*]], <4 x i32*> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[PTR:%.*]], <4 x i64> zeroinitializer
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[VAL:%.*]], <4 x ptr> [[TMP1]], i32 4, <4 x i1> [[MASK:%.*]])
; CHECK-NEXT: ret void
;
%1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0
%2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask)
%1 = insertelement <4 x ptr> undef, ptr %ptr, i32 0
%2 = shufflevector <4 x ptr> %1, <4 x ptr> undef, <4 x i32> zeroinitializer
call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %2, i32 4, <4 x i1> %mask)
ret void
}

declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>, <4 x i32*>, i32, <4 x i1>)
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>)
declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>)
69 changes: 34 additions & 35 deletions llvm/test/Transforms/CodeGenPrepare/X86/gep-unmerging.ll
Original file line number Diff line number Diff line change
@@ -1,59 +1,58 @@
; RUN: opt -codegenprepare -S -mtriple=x86_64 < %s | FileCheck %s

@exit_addr = constant i8* blockaddress(@gep_unmerging, %exit)
@op1_addr = constant i8* blockaddress(@gep_unmerging, %op1)
@op2_addr = constant i8* blockaddress(@gep_unmerging, %op2)
@op3_addr = constant i8* blockaddress(@gep_unmerging, %op3)
@exit_addr = constant ptr blockaddress(@gep_unmerging, %exit)
@op1_addr = constant ptr blockaddress(@gep_unmerging, %op1)
@op2_addr = constant ptr blockaddress(@gep_unmerging, %op2)
@op3_addr = constant ptr blockaddress(@gep_unmerging, %op3)
@dummy = global i8 0

define void @gep_unmerging(i1 %pred, i8* %p0) {
define void @gep_unmerging(i1 %pred, ptr %p0) {
entry:
%table = alloca [256 x i8*]
%table_0 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 0
%table_1 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 1
%table_2 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 2
%table_3 = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 3
%exit_a = load i8*, i8** @exit_addr
%op1_a = load i8*, i8** @op1_addr
%op2_a = load i8*, i8** @op2_addr
%op3_a = load i8*, i8** @op3_addr
store i8* %exit_a, i8** %table_0
store i8* %op1_a, i8** %table_1
store i8* %op2_a, i8** %table_2
store i8* %op3_a, i8** %table_3
%table = alloca [256 x ptr]
%table_1 = getelementptr [256 x ptr], ptr %table, i64 0, i64 1
%table_2 = getelementptr [256 x ptr], ptr %table, i64 0, i64 2
%table_3 = getelementptr [256 x ptr], ptr %table, i64 0, i64 3
%exit_a = load ptr, ptr @exit_addr
%op1_a = load ptr, ptr @op1_addr
%op2_a = load ptr, ptr @op2_addr
%op3_a = load ptr, ptr @op3_addr
store ptr %exit_a, ptr %table
store ptr %op1_a, ptr %table_1
store ptr %op2_a, ptr %table_2
store ptr %op3_a, ptr %table_3
br label %indirectbr

op1:
; CHECK-LABEL: op1:
; CHECK-NEXT: %p1_inc2 = getelementptr i8, i8* %p_preinc, i64 3
; CHECK-NEXT: %p1_inc1 = getelementptr i8, i8* %p_preinc, i64 2
%p1_inc2 = getelementptr i8, i8* %p_preinc, i64 3
%p1_inc1 = getelementptr i8, i8* %p_preinc, i64 2
%a10 = load i8, i8* %p_postinc
%a11 = load i8, i8* %p1_inc1
; CHECK-NEXT: %p1_inc2 = getelementptr i8, ptr %p_preinc, i64 3
; CHECK-NEXT: %p1_inc1 = getelementptr i8, ptr %p_preinc, i64 2
%p1_inc2 = getelementptr i8, ptr %p_preinc, i64 3
%p1_inc1 = getelementptr i8, ptr %p_preinc, i64 2
%a10 = load i8, ptr %p_postinc
%a11 = load i8, ptr %p1_inc1
%a12 = add i8 %a10, %a11
store i8 %a12, i8* @dummy
store i8 %a12, ptr @dummy
br i1 %pred, label %indirectbr, label %exit

op2:
; CHECK-LABEL: op2:
; CHECK-NEXT: %p2_inc = getelementptr i8, i8* %p_preinc, i64 2
%p2_inc = getelementptr i8, i8* %p_preinc, i64 2
%a2 = load i8, i8* %p_postinc
store i8 %a2, i8* @dummy
; CHECK-NEXT: %p2_inc = getelementptr i8, ptr %p_preinc, i64 2
%p2_inc = getelementptr i8, ptr %p_preinc, i64 2
%a2 = load i8, ptr %p_postinc
store i8 %a2, ptr @dummy
br i1 %pred, label %indirectbr, label %exit

op3:
br i1 %pred, label %indirectbr, label %exit

indirectbr:
%p_preinc = phi i8* [%p0, %entry], [%p1_inc2, %op1], [%p2_inc, %op2], [%p_postinc, %op3]
%p_postinc = getelementptr i8, i8* %p_preinc, i64 1
%next_op = load i8, i8* %p_preinc
%p_preinc = phi ptr [%p0, %entry], [%p1_inc2, %op1], [%p2_inc, %op2], [%p_postinc, %op3]
%p_postinc = getelementptr i8, ptr %p_preinc, i64 1
%next_op = load i8, ptr %p_preinc
%p_zext = zext i8 %next_op to i64
%slot = getelementptr [256 x i8*], [256 x i8*]* %table, i64 0, i64 %p_zext
%target = load i8*, i8** %slot
indirectbr i8* %target, [label %exit, label %op1, label %op2]
%slot = getelementptr [256 x ptr], ptr %table, i64 0, i64 %p_zext
%target = load ptr, ptr %slot
indirectbr ptr %target, [label %exit, label %op1, label %op2]

exit:
ret void
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/Transforms/CodeGenPrepare/X86/invariant.group.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
define void @foo() {
enter:
; CHECK-NOT: !invariant.group
; CHECK-NOT: @llvm.launder.invariant.group.p0i8(
; CHECK: %val = load i8, i8* @tmp, align 1{{$}}
%val = load i8, i8* @tmp, !invariant.group !0
%ptr = call i8* @llvm.launder.invariant.group.p0i8(i8* @tmp)
; CHECK-NOT: @llvm.launder.invariant.group.p0(
; CHECK: %val = load i8, ptr @tmp, align 1{{$}}
%val = load i8, ptr @tmp, !invariant.group !0
%ptr = call ptr @llvm.launder.invariant.group.p0(ptr @tmp)

; CHECK: store i8 42, i8* @tmp, align 1{{$}}
store i8 42, i8* %ptr, !invariant.group !0
; CHECK: store i8 42, ptr @tmp, align 1{{$}}
store i8 42, ptr %ptr, !invariant.group !0

ret void
}
Expand All @@ -22,19 +22,19 @@ enter:
define void @foo2() {
enter:
; CHECK-NOT: !invariant.group
; CHECK-NOT: @llvm.strip.invariant.group.p0i8(
; CHECK: %val = load i8, i8* @tmp, align 1{{$}}
%val = load i8, i8* @tmp, !invariant.group !0
%ptr = call i8* @llvm.strip.invariant.group.p0i8(i8* @tmp)
; CHECK-NOT: @llvm.strip.invariant.group.p0(
; CHECK: %val = load i8, ptr @tmp, align 1{{$}}
%val = load i8, ptr @tmp, !invariant.group !0
%ptr = call ptr @llvm.strip.invariant.group.p0(ptr @tmp)

; CHECK: store i8 42, i8* @tmp, align 1{{$}}
store i8 42, i8* %ptr, !invariant.group !0
; CHECK: store i8 42, ptr @tmp, align 1{{$}}
store i8 42, ptr %ptr, !invariant.group !0

ret void
}
; CHECK-LABEL: }


declare i8* @llvm.launder.invariant.group.p0i8(i8*)
declare i8* @llvm.strip.invariant.group.p0i8(i8*)
declare ptr @llvm.launder.invariant.group.p0(ptr)
declare ptr @llvm.strip.invariant.group.p0(ptr)
!0 = !{}
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; - attribute nobuiltin
; - TLI::has (always returns false thanks to -disable-simplify-libcalls)

define void @test_nobuiltin(i8* %dst, i64 %len) {
define void @test_nobuiltin(ptr %dst, i64 %len) {
; CHECK-LABEL: @test_nobuiltin(
; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 [[DST:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[DST:%.*]], i8 0, i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: ret void
;
call i8* @__memset_chk(i8* %dst, i32 0, i64 %len, i64 -1) nobuiltin
call ptr @__memset_chk(ptr %dst, i32 0, i64 %len, i64 -1) nobuiltin
ret void
}

declare i8* @__memset_chk(i8*, i32, i64, i64)
declare ptr @__memset_chk(ptr, i32, i64, i64)
2 changes: 1 addition & 1 deletion llvm/test/Transforms/CodeGenPrepare/X86/multi-extension.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ declare void @bar(i64)
; CHECK-NEXT: %c = sext i16 %promoted to i64
define i32 @foo(i16 %kkk) {
entry:
%t4 = load i16, i16* @b, align 2
%t4 = load i16, ptr @b, align 2
%conv4 = zext i16 %t4 to i32
%or = or i16 %kkk, %t4
%c = sext i16 %or to i64
Expand Down
32 changes: 15 additions & 17 deletions llvm/test/Transforms/CodeGenPrepare/X86/nonintegral.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,15 @@ target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-ni:1"
target triple = "x86_64-unknown-linux-gnu"

define void @test_simple(i1 %cond, i64 addrspace(1)* %base) {
define void @test_simple(i1 %cond, ptr addrspace(1) %base) {
; CHECK-LABEL: @test_simple
; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
; CHECK-NOT: inttoptr {{.*}} to ptr addrspace(1)
entry:
%addr = getelementptr inbounds i64, i64 addrspace(1)* %base, i64 5
%casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
%addr = getelementptr inbounds i64, ptr addrspace(1) %base, i64 5
br i1 %cond, label %if.then, label %fallthrough

if.then:
%v = load i32, i32 addrspace(1)* %casted, align 4
%v = load i32, ptr addrspace(1) %addr, align 4
br label %fallthrough

fallthrough:
Expand All @@ -27,40 +26,39 @@ fallthrough:

define void @test_inttoptr_base(i1 %cond, i64 %base) {
; CHECK-LABEL: @test_inttoptr_base
; CHECK-NOT: inttoptr {{.*}} to i64 addrspace(1)*
; CHECK-NOT: inttoptr {{.*}} to ptr addrspace(1)
entry:
; Doing the inttoptr in the integral addrspace(0) followed by an explicit
; (frontend-introduced) addrspacecast is fine. We cannot however introduce
; a direct inttoptr to addrspace(1)
%baseptr = inttoptr i64 %base to i64*
%baseptrni = addrspacecast i64 *%baseptr to i64 addrspace(1)*
%addr = getelementptr inbounds i64, i64 addrspace(1)* %baseptrni, i64 5
%casted = bitcast i64 addrspace(1)* %addr to i32 addrspace(1)*
%baseptr = inttoptr i64 %base to ptr
%baseptrni = addrspacecast ptr %baseptr to ptr addrspace(1)
%addr = getelementptr inbounds i64, ptr addrspace(1) %baseptrni, i64 5
br i1 %cond, label %if.then, label %fallthrough

if.then:
%v = load i32, i32 addrspace(1)* %casted, align 4
%v = load i32, ptr addrspace(1) %addr, align 4
br label %fallthrough

fallthrough:
ret void
}

define void @test_ptrtoint_base(i1 %cond, i64 addrspace(1)* %base) {
define void @test_ptrtoint_base(i1 %cond, ptr addrspace(1) %base) {
; CHECK-LABEL: @test_ptrtoint_base
; CHECK-NOT: ptrtoint addrspace(1)* {{.*}} to i64
entry:
; This one is inserted by the frontend, so it's fine. We're not allowed to
; directly ptrtoint %base ourselves though
%baseptr0 = addrspacecast i64 addrspace(1)* %base to i64*
%toint = ptrtoint i64* %baseptr0 to i64
%baseptr0 = addrspacecast ptr addrspace(1) %base to ptr
%toint = ptrtoint ptr %baseptr0 to i64
%added = add i64 %toint, 8
%toptr = inttoptr i64 %added to i64*
%geped = getelementptr i64, i64* %toptr, i64 2
%toptr = inttoptr i64 %added to ptr
%geped = getelementptr i64, ptr %toptr, i64 2
br i1 %cond, label %if.then, label %fallthrough

if.then:
%v = load i64, i64* %geped, align 4
%v = load i64, ptr %geped, align 4
br label %fallthrough

fallthrough:
Expand Down
Loading