56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion llvm/test/CodeGen/X86/vectorcall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1)

define x86_vectorcallcc void @test_mixed_7(%struct.HVA5* noalias sret %agg.result) {
; CHECK-LABEL: test_mixed_7
; X86-LABEL: test_mixed_7@@4
; X64-LABEL: test_mixed_7@@8
; X64: mov{{[ql]}} %rcx, %rax
; CHECK: movaps %xmm{{[0-9]}}, 64(%{{rcx|eax}})
; CHECK: movaps %xmm{{[0-9]}}, 48(%{{rcx|eax}})
Expand Down
52 changes: 35 additions & 17 deletions llvm/test/Transforms/Attributor/readattrs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,12 @@ define void @test6_2(i8** %p, i8* %q) {
define void @test7_1(i32* inalloca %a) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@test7_1
; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@test7_1
; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree writeonly [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-SAME: (i32* inalloca nocapture nofree nonnull writeonly dereferenceable(4) [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-NEXT: ret void
;
ret void
Expand Down Expand Up @@ -163,11 +163,17 @@ declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32,
; CHECK-NOT: readnone
; CHECK-NOT: readonly
define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
; CHECK: Function Attrs: nounwind willreturn
; CHECK-LABEL: define {{[^@]+}}@test9
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR11:#.*]]
; CHECK-NEXT: ret void
; IS__TUNIT____: Function Attrs: nounwind willreturn writeonly
; IS__TUNIT____-LABEL: define {{[^@]+}}@test9
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
; IS__TUNIT____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR11:#.*]]
; IS__TUNIT____-NEXT: ret void
;
; IS__CGSCC____: Function Attrs: nounwind willreturn writeonly
; IS__CGSCC____-LABEL: define {{[^@]+}}@test9
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]], <4 x i32> [[VAL:%.*]]) [[ATTR4:#.*]] {
; IS__CGSCC____-NEXT: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> [[VAL]], <4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>) [[ATTR12:#.*]]
; IS__CGSCC____-NEXT: ret void
;
call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
ret void
Expand All @@ -176,11 +182,17 @@ define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
; CHECK: declare <4 x i32> @llvm.masked.gather
declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
define <4 x i32> @test10(<4 x i32*> %ptrs) {
; CHECK: Function Attrs: nounwind readonly willreturn
; CHECK-LABEL: define {{[^@]+}}@test10
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR12:#.*]]
; CHECK-NEXT: ret <4 x i32> [[RES]]
; IS__TUNIT____: Function Attrs: nounwind readonly willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@test10
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR12:#.*]]
; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]]
;
; IS__CGSCC____: Function Attrs: nounwind readonly willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@test10
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR5:#.*]] {
; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[PTRS]], i32 noundef 4, <4 x i1> noundef <i1 true, i1 false, i1 true, i1 false>, <4 x i32> undef) [[ATTR13:#.*]]
; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
ret <4 x i32> %res
Expand All @@ -202,11 +214,17 @@ define <4 x i32> @test11_2(<4 x i32*> %ptrs) {
declare <4 x i32> @test12_1(<4 x i32*>) argmemonly nounwind
; CHECK-NOT: readnone
define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
; CHECK: Function Attrs: argmemonly nounwind
; CHECK-LABEL: define {{[^@]+}}@test12_2
; CHECK-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]]
; CHECK-NEXT: ret <4 x i32> [[RES]]
; IS__TUNIT____: Function Attrs: argmemonly nounwind
; IS__TUNIT____-LABEL: define {{[^@]+}}@test12_2
; IS__TUNIT____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
; IS__TUNIT____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR13:#.*]]
; IS__TUNIT____-NEXT: ret <4 x i32> [[RES]]
;
; IS__CGSCC____: Function Attrs: argmemonly nounwind
; IS__CGSCC____-LABEL: define {{[^@]+}}@test12_2
; IS__CGSCC____-SAME: (<4 x i32*> [[PTRS:%.*]]) [[ATTR7:#.*]] {
; IS__CGSCC____-NEXT: [[RES:%.*]] = call <4 x i32> @test12_1(<4 x i32*> [[PTRS]]) [[ATTR14:#.*]]
; IS__CGSCC____-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
ret <4 x i32> %res
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/Transforms/Attributor/value-simplify.ll
Original file line number Diff line number Diff line change
Expand Up @@ -332,48 +332,48 @@ define i32 @ipccp3() {
define internal i32* @test_inalloca(i32* inalloca %a) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@test_inalloca
; IS__TUNIT____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-SAME: (i32* inalloca noalias nofree nonnull returned writeonly dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-NEXT: ret i32* [[A]]
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@test_inalloca
; IS__CGSCC____-SAME: (i32* inalloca noalias nofree noundef returned writeonly align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-SAME: (i32* inalloca noalias nofree nonnull returned writeonly dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-NEXT: ret i32* [[A]]
;
ret i32* %a
}
define i32* @complicated_args_inalloca() {
define i32* @complicated_args_inalloca(i32* %arg) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@complicated_args_inalloca
; IS__TUNIT____-SAME: () [[ATTR1]] {
; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR1]]
; IS__TUNIT____-SAME: (i32* nofree readnone returned "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1]] {
; IS__TUNIT____-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree writeonly "no-capture-maybe-returned" [[ARG]]) [[ATTR1]]
; IS__TUNIT____-NEXT: ret i32* [[CALL]]
;
; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
; IS__CGSCC_OPM-SAME: () [[ATTR1:#.*]] {
; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR5:#.*]]
; IS__CGSCC_OPM-SAME: (i32* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1:#.*]] {
; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree nonnull writeonly dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) [[ATTR5:#.*]]
; IS__CGSCC_OPM-NEXT: ret i32* [[CALL]]
;
; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@complicated_args_inalloca
; IS__CGSCC_NPM-SAME: () [[ATTR1:#.*]] {
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nocapture nofree noundef writeonly align 536870912 null) [[ATTR4:#.*]]
; IS__CGSCC_NPM-SAME: (i32* nofree nonnull readnone returned dereferenceable(4) "no-capture-maybe-returned" [[ARG:%.*]]) [[ATTR1:#.*]] {
; IS__CGSCC_NPM-NEXT: [[CALL:%.*]] = call i32* @test_inalloca(i32* noalias nofree nonnull writeonly dereferenceable(4) "no-capture-maybe-returned" [[ARG]]) [[ATTR4:#.*]]
; IS__CGSCC_NPM-NEXT: ret i32* [[CALL]]
;
%call = call i32* @test_inalloca(i32* null)
%call = call i32* @test_inalloca(i32* %arg)
ret i32* %call
}

define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
; IS__TUNIT____-LABEL: define {{[^@]+}}@test_preallocated
; IS__TUNIT____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__TUNIT____-NEXT: ret i32* [[A]]
;
; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
; IS__CGSCC____-LABEL: define {{[^@]+}}@test_preallocated
; IS__CGSCC____-SAME: (i32* noalias nofree noundef returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-SAME: (i32* noalias nofree noundef nonnull returned writeonly preallocated(i32) align 536870912 dereferenceable(4) "no-capture-maybe-returned" [[A:%.*]]) [[ATTR1]] {
; IS__CGSCC____-NEXT: ret i32* [[A]]
;
ret i32* %a
Expand Down
49 changes: 45 additions & 4 deletions llvm/test/Transforms/InstCombine/AMDGPU/memcpy-from-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,47 @@ define i8 @memcpy_constant_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias rea
ret i8 %load
}

; Simple memcpy to alloca from byref constant address space argument.
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca([32 x i8] addrspace(4)* noalias readonly align 4 byref([32 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) {
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca(
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
; CHECK-NEXT: ret void
;
%alloca = alloca [32 x i8], align 4, addrspace(5)
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
%load = load i8, i8 addrspace(5)* %gep
store i8 %load, i8 addrspace(1)* %out
ret void
}

; Simple memcpy to alloca from byref constant address space argument, but not enough bytes are dereferenceable
define amdgpu_kernel void @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes([31 x i8] addrspace(4)* noalias readonly align 4 byref([31 x i8]) %arg, i8 addrspace(1)* %out, i32 %idx) {
; CHECK-LABEL: @memcpy_constant_byref_arg_ptr_to_alloca_too_many_bytes(
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5)
; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 0
; CHECK-NEXT: [[ARG_CAST:%.*]] = getelementptr inbounds [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 0
; CHECK-NEXT: call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* align 4 dereferenceable(31) [[ALLOCA_CAST]], i8 addrspace(4)* align 4 dereferenceable(31) [[ARG_CAST]], i64 31, i1 false)
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i32 0, i32 [[IDX:%.*]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(5)* [[GEP]], align 1
; CHECK-NEXT: store i8 [[LOAD]], i8 addrspace(1)* [[OUT:%.*]], align 1
; CHECK-NEXT: ret void
;
%alloca = alloca [32 x i8], align 4, addrspace(5)
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
%arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)*
call void @llvm.memcpy.p5i8.p4i8.i64(i8 addrspace(5)* %alloca.cast, i8 addrspace(4)* %arg.cast, i64 31, i1 false)
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
%load = load i8, i8 addrspace(5)* %gep
store i8 %load, i8 addrspace(1)* %out
ret void
}

; Simple memcpy to alloca from constant address space intrinsic call
define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(1)* %out, i32 %idx) {
; CHECK-LABEL: @memcpy_constant_intrinsic_ptr_to_alloca(
Expand All @@ -44,18 +85,18 @@ define amdgpu_kernel void @memcpy_constant_intrinsic_ptr_to_alloca(i8 addrspace(
}

; Alloca is written through a flat pointer
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
define i8 @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat([31 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i32 %idx) {
; CHECK-LABEL: @memcpy_constant_arg_ptr_to_alloca_addrspacecast_to_flat(
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[IDX:%.*]] to i64
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr [31 x i8], [31 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 [[TMP1]]
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[GEP]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%alloca = alloca [32 x i8], align 4, addrspace(5)
%alloca.cast = bitcast [32 x i8] addrspace(5)* %alloca to i8 addrspace(5)*
%alloca.cast.asc = addrspacecast i8 addrspace(5)* %alloca.cast to i8*
%arg.cast = bitcast [32 x i8] addrspace(4)* %arg to i8 addrspace(4)*
call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 32, i1 false)
%arg.cast = bitcast [31 x i8] addrspace(4)* %arg to i8 addrspace(4)*
call void @llvm.memcpy.p0i8.p4i8.i64(i8* %alloca.cast.asc, i8 addrspace(4)* %arg.cast, i64 31, i1 false)
%gep = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 %idx
%load = load i8, i8 addrspace(5)* %gep
ret i8 %load
Expand Down