diff --git a/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll index 74fc7b317708e..1a9c5974a547a 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-gep-sink.ll @@ -1,10 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -codegenprepare -mtriple=arm64_32-apple-ios %s -S -o - | FileCheck %s define void @test_simple_sink(ptr %base, i64 %offset) { -; CHECK-LABEL: @test_simple_sink -; CHECK: next: -; CHECK: [[ADDR8:%.*]] = getelementptr i8, ptr %base, i64 %offset -; CHECK: load volatile i1, ptr [[ADDR8]] +; CHECK-LABEL: define void @test_simple_sink( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i1, ptr [[BASE]], i64 [[OFFSET]] +; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1 +; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]] +; CHECK: next: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET]] +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1 +; CHECK-NEXT: ret void +; CHECK: end: +; CHECK-NEXT: ret void +; %addr = getelementptr i1, ptr %base, i64 %offset %tst = load i1, ptr %addr br i1 %tst, label %next, label %end @@ -18,10 +27,18 @@ end: } define void @test_inbounds_sink(ptr %base, i64 %offset) { -; CHECK-LABEL: @test_inbounds_sink -; CHECK: next: -; CHECK: [[ADDR8:%.*]] = getelementptr inbounds i8, ptr %base, i64 %offset -; CHECK: load volatile i1, ptr [[ADDR8]] +; CHECK-LABEL: define void @test_inbounds_sink( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[ADDR:%.*]] = getelementptr inbounds i1, ptr [[BASE]], i64 [[OFFSET]] +; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1 +; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]] +; CHECK: next: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[BASE]], i64 [[OFFSET]] +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1 +; CHECK-NEXT: ret void +; CHECK: end: +; CHECK-NEXT: ret void +; %addr = getelementptr inbounds i1, ptr %base, i64 %offset %tst = load i1, ptr %addr br i1 %tst, label %next, label %end @@ -36,10 +53,20 @@ end: ; No address derived via an add can be guaranteed inbounds define void @test_add_sink(ptr %base, i64 %offset) { -; CHECK-LABEL: @test_add_sink -; CHECK: next: -; CHECK: [[ADDR8:%.*]] = getelementptr i8, ptr %base, i64 %offset -; CHECK: load volatile i1, ptr [[ADDR8]] +; CHECK-LABEL: define void @test_add_sink( +; CHECK-SAME: ptr [[BASE:%.*]], i64 [[OFFSET:%.*]]) { +; CHECK-NEXT: [[BASE64:%.*]] = ptrtoint ptr [[BASE]] to i64 +; CHECK-NEXT: [[ADDR64:%.*]] = add nuw nsw i64 [[BASE64]], [[OFFSET]] +; CHECK-NEXT: [[ADDR:%.*]] = inttoptr i64 [[ADDR64]] to ptr +; CHECK-NEXT: [[TST:%.*]] = load i1, ptr [[ADDR]], align 1 +; CHECK-NEXT: br i1 [[TST]], label [[NEXT:%.*]], label [[END:%.*]] +; CHECK: next: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[OFFSET]] +; CHECK-NEXT: [[TMP1:%.*]] = load volatile i1, ptr [[SUNKADDR]], align 1 +; CHECK-NEXT: ret void +; CHECK: end: +; CHECK-NEXT: ret void +; %base64 = ptrtoint ptr %base to i64 %addr64 = add nsw nuw i64 %base64, %offset %addr = inttoptr i64 %addr64 to ptr diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll index 344ee62b44065..b97acaa8d7b35 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor-constexpr-alias.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --version 3 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -amdgpu-lower-ctor-dtor %s | FileCheck %s ; Make sure we emit code for constructor entries that aren't direct @@ -16,29 +17,38 @@ @foo.alias = hidden alias void (), ptr @foo -;. -; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata" -; CHECK: @foo.alias = hidden alias void (), ptr @foo -;. define void @foo() { -; CHECK-LABEL: @foo( -; CHECK-NEXT: ret void -; ret void } define void @bar() addrspace(1) { -; CHECK-LABEL: @bar( -; CHECK-NEXT: ret void -; ret void } -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() + + +;. +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }] +; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }] +; CHECK: @[[__INIT_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__INIT_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__FINI_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata" +; CHECK: @[[FOO_ALIAS:[a-zA-Z0-9_$"\\.-]+]] = hidden alias void (), ptr @foo +;. +; CHECK-LABEL: define void @foo( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define void @bar( +; CHECK-SAME: ) addrspace(1) #[[ATTR0]] { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: @@ -50,8 +60,10 @@ define void @bar() addrspace(1) { ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] ; CHECK: while.end: ; CHECK-NEXT: ret void - -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( +; CHECK-SAME: ) #[[ATTR2:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] ; CHECK: while.entry: @@ -63,6 +75,9 @@ define void @bar() addrspace(1) { ; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] ; CHECK: while.end: ; CHECK-NEXT: ret void - -; CHECK: attributes #[[ATTR0:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } +; +;. +; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx900" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" } +; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll index 5c8e56dd93933..aca5886bce5f7 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-ctor-dtor.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --version 3 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-ctor-dtor < %s | FileCheck %s @@ -11,39 +12,9 @@ @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] @llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] -; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] -; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini] -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0 -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] -; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] -; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 -; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end -; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] -; CHECK: while.end: -; CHECK-NEXT: ret void -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1 -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] -; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] -; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 -; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end -; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] -; CHECK: while.end: -; CHECK-NEXT: ret void -; CHECK-NOT: amdgcn.device. ; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init ; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd @@ -73,5 +44,53 @@ define internal void @bar() { ret void } -; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" } -; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } +;. +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }] +; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }] +; CHECK: @[[__INIT_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__INIT_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__FINI_ARRAY_START:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)] +; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata" +;. +; CHECK-LABEL: define internal void @foo() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @bar() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" } +; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } +;. diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll index 10064664aa991..6e13cfb00a4b3 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-merge.ll @@ -1,16 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s @a = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4 @b = internal unnamed_addr addrspace(3) global [64 x i32] undef, align 4 -; CHECK-LABEL: @no_clobber_ds_load_stores_x2_preexisting_aa -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa !1, !noalias !6 -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !tbaa !1, !noalias !6 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa !1, !noalias !6 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !tbaa !1, !noalias !6 - define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa(ptr addrspace(1) %arg, i32 %i) { +; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2_preexisting_aa( +; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, align 16, !tbaa [[TBAA1:![0-9]+]], !noalias !6 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), align 16, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_PREEXISTING_AA_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2_preexisting_aa.lds, i32 0, i32 1), i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !tbaa [[TBAA1]], !noalias !6 +; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 +; CHECK-NEXT: ret void +; bb: store i32 1, ptr addrspace(3) @a, align 4, !alias.scope !0, !noalias !3, !tbaa !5 %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll index d7697c903721b..fe88b7770e09c 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll @@ -10,19 +10,38 @@ ; does not happen because when SILoadStoreOptimizer is run, the reads and writes ; are not adjacent. They are only moved later by MachineScheduler. -; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x2: -; GCN: ds_write_b32 -; GCN: ds_write_b32 -; GCN: ds_read_b32 -; GCN: ds_read_b32 - -; CHECK-LABEL: @no_clobber_ds_load_stores_x2 -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4 -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !1, !noalias !4 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !4, !noalias !1 - define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) { +; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2( +; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !1, !noalias !4 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !4, !noalias !1 +; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]] +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 +; CHECK-NEXT: ret void +; +; GCN-LABEL: no_clobber_ds_load_stores_x2: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s2, s[0:1], 0x2c +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, 2 +; GCN-NEXT: ds_write_b32 v1, v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshl_b32 s2, s2, 2 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: ds_write_b32 v1, v2 offset:256 +; GCN-NEXT: ds_read_b32 v2, v0 +; GCN-NEXT: ds_read_b32 v0, v0 offset:256 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_u32_e32 v0, v2, v0 +; GCN-NEXT: global_store_dword v1, v0, s[0:1] +; GCN-NEXT: s_endpgm bb: store i32 1, ptr addrspace(3) @a, align 4 %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i @@ -35,24 +54,46 @@ bb: ret void } -; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x3: -; GCN-DAG: ds_write_b32 -; GCN-DAG: ds_write_b32 -; GCN-DAG: ds_write_b32 -; GCN-DAG: ds_read_b32 -; GCN-DAG: ds_read_b32 -; GCN-DAG: ds_read_b32 - -; CHECK-LABEL: @no_clobber_ds_load_stores_x3 -; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9 -; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i -; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !6, !noalias !9 -; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13 -; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !12, !noalias !13 -; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15 -; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !14, !noalias !15 - define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) { +; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x3( +; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !6, !noalias !9 +; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13 +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !12, !noalias !13 +; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), i32 0, i32 [[I]] +; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !14, !noalias !15 +; CHECK-NEXT: [[VAL_1:%.*]] = add i32 [[VAL_A]], [[VAL_B]] +; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_1]], [[VAL_C]] +; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4 +; CHECK-NEXT: ret void +; +; GCN-LABEL: no_clobber_ds_load_stores_x3: +; GCN: ; %bb.0: ; %bb +; GCN-NEXT: s_load_dword s2, s[0:1], 0x2c +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v2, 2 +; GCN-NEXT: v_mov_b32_e32 v0, 1 +; GCN-NEXT: ds_write_b32 v1, v2 offset:256 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshl_b32 s2, s2, 2 +; GCN-NEXT: v_mov_b32_e32 v2, 3 +; GCN-NEXT: ds_write_b32 v1, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: ds_write_b32 v1, v2 offset:512 +; GCN-NEXT: ds_read_b32 v2, v0 +; GCN-NEXT: ds_read_b32 v3, v0 offset:256 +; GCN-NEXT: ds_read_b32 v0, v0 offset:512 +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_add_u32_e32 v2, v2, v3 +; GCN-NEXT: v_add_u32_e32 v0, v2, v0 +; GCN-NEXT: global_store_dword v1, v0, s[0:1] +; GCN-NEXT: s_endpgm bb: store i32 1, ptr addrspace(3) @a, align 4 %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i diff --git a/llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll b/llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll index 7b534ab76f0e4..cb34d48875d10 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s @@ -32,13 +33,22 @@ ; Functions that are not called are ignored by the lowering define amdgpu_kernel void @call_func() { +; CHECK-LABEL: define amdgpu_kernel void @call_func( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ] +; CHECK-NEXT: call void @func() +; CHECK-NEXT: ret void +; call void @func() ret void } -; CHECK-LABEL: @func() -; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.000000e+00 monotonic, align 8 define void @func() { +; CHECK-LABEL: define void @func() { +; CHECK-NEXT: [[DEC:%.*]] = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.000000e+00 monotonic, align 8 +; CHECK-NEXT: [[UNUSED0:%.*]] = atomicrmw add ptr addrspace(1) @ignored, i64 1 monotonic, align 8 +; CHECK-NEXT: ret void +; %dec = atomicrmw fsub ptr addrspace(3) @tolower, float 1.0 monotonic %unused0 = atomicrmw add ptr addrspace(1) @ignored, i64 1 monotonic ret void diff --git a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll index 57362b71cf665..7aa1d212ed417 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll @@ -1,6 +1,9 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS + +; UTC_ARGS: --disable @llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }] @llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }] @@ -9,32 +12,8 @@ ; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)] ; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini] +; UTC_ARGS: --enable -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0 -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] -; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] -; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 -; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end -; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] -; CHECK: while.end: -; CHECK-NEXT: ret void - -; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1 -; CHECK-NEXT: entry: -; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] -; CHECK: while.entry: -; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] -; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 -; CHECK-NEXT: call void [[CALLBACK]]() -; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 -; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end -; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] -; CHECK: while.end: -; CHECK-NEXT: ret void ; CHECK-VIS: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init ; CHECK-VIS: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd @@ -57,5 +36,48 @@ define internal void @bar.5() { ret void } -; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" } -; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" } +; CHECK-LABEL: define internal void @foo() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @bar() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @foo.5() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @bar.5() { +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]] +; CHECK: while.entry: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ] +; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: call void [[CALLBACK]]() +; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1 +; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end +; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]] +; CHECK: while.end: +; CHECK-NEXT: ret void +; diff --git a/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll b/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll index f38cd4cb336d3..ecbfec96a19b0 100644 --- a/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll +++ b/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll @@ -1,6 +1,6 @@ -; RUN: opt -O2 %s | llvm-dis > %t1 -; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK-ALU64 %s -; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK-ALU32 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -O2 -S < %s | llc -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU64 %s +; RUN: opt -O2 -S < %s | llc -mattr=+alu32 -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU32 %s ; Source code: ; struct s { ; unsigned long long f1; @@ -26,6 +26,68 @@ target triple = "bpfeb" ; Function Attrs: nounwind readnone define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 { +; CHECK-ALU64-LABEL: test: +; CHECK-ALU64: .Ltest$local: +; CHECK-ALU64-NEXT: .type .Ltest$local,@function +; CHECK-ALU64-NEXT: .Lfunc_begin0: +; CHECK-ALU64-NEXT: .loc 1 11 0 # test.c:11:0 +; CHECK-ALU64-NEXT: .cfi_sections .debug_frame +; CHECK-ALU64-NEXT: .cfi_startproc +; CHECK-ALU64-NEXT: # %bb.0: # %entry +; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1 +; CHECK-ALU64-NEXT: .Ltmp0: +; CHECK-ALU64-NEXT: r1 = 20 +; CHECK-ALU64-NEXT: .Ltmp1: +; CHECK-ALU64-NEXT: .Ltmp2: +; CHECK-ALU64-NEXT: .Ltmp3: +; CHECK-ALU64-NEXT: r0 = 4 +; CHECK-ALU64-NEXT: .Ltmp4: +; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69 +; CHECK-ALU64-NEXT: .Ltmp5: +; CHECK-ALU64-NEXT: .Ltmp6: +; CHECK-ALU64-NEXT: r0 += r1 +; CHECK-ALU64-NEXT: .Ltmp7: +; CHECK-ALU64-NEXT: r1 = 45 +; CHECK-ALU64-NEXT: .loc 1 13 67 # test.c:13:67 +; CHECK-ALU64-NEXT: .Ltmp8: +; CHECK-ALU64-NEXT: r0 += r1 +; CHECK-ALU64-NEXT: .loc 1 12 3 # test.c:12:3 +; CHECK-ALU64-NEXT: .Ltmp9: +; CHECK-ALU64-NEXT: exit +; CHECK-ALU64-NEXT: .Ltmp10: +; CHECK-ALU64-NEXT: .Ltmp11: +; +; CHECK-ALU32-LABEL: test: +; CHECK-ALU32: .Ltest$local: +; CHECK-ALU32-NEXT: .type .Ltest$local,@function +; CHECK-ALU32-NEXT: .Lfunc_begin0: +; CHECK-ALU32-NEXT: .loc 1 11 0 # test.c:11:0 +; CHECK-ALU32-NEXT: .cfi_sections .debug_frame +; CHECK-ALU32-NEXT: .cfi_startproc +; CHECK-ALU32-NEXT: # %bb.0: # %entry +; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1 +; CHECK-ALU32-NEXT: .Ltmp0: +; CHECK-ALU32-NEXT: r1 = 20 +; CHECK-ALU32-NEXT: .Ltmp1: +; CHECK-ALU32-NEXT: .Ltmp2: +; CHECK-ALU32-NEXT: .Ltmp3: +; CHECK-ALU32-NEXT: r0 = 4 +; CHECK-ALU32-NEXT: .Ltmp4: +; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69 +; CHECK-ALU32-NEXT: .Ltmp5: +; CHECK-ALU32-NEXT: .Ltmp6: +; CHECK-ALU32-NEXT: w0 += w1 +; CHECK-ALU32-NEXT: .Ltmp7: +; CHECK-ALU32-NEXT: r1 = 45 +; CHECK-ALU32-NEXT: .loc 1 13 67 # test.c:13:67 +; CHECK-ALU32-NEXT: .Ltmp8: +; CHECK-ALU32-NEXT: w0 += w1 +; CHECK-ALU32-NEXT: # kill: def $w0 killed $w0 killed $r0 +; CHECK-ALU32-NEXT: .loc 1 12 3 # test.c:12:3 +; CHECK-ALU32-NEXT: .Ltmp9: +; CHECK-ALU32-NEXT: exit +; CHECK-ALU32-NEXT: .Ltmp10: +; CHECK-ALU32-NEXT: .Ltmp11: entry: call void @llvm.dbg.value(metadata ptr %arg, metadata !30, metadata !DIExpression()), !dbg !31 %0 = tail call ptr @llvm.preserve.struct.access.index.p0.p0.ss(ptr elementtype(%struct.s) %arg, i32 5, i32 6), !dbg !32, !llvm.preserve.access.index !18 @@ -37,15 +99,6 @@ entry: ret i32 %add1, !dbg !38 } -; CHECK: r1 = 20 -; CHECK: r0 = 4 -; CHECK-ALU64: r0 += r1 -; CHECK-ALU32: w0 += w1 -; CHECK-EB: r1 = 45 -; CHECK-ALU64: r0 += r1 -; CHECK-ALU32: w0 += w1 -; CHECK: exit - ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) ; CHECK: .byte 115 # string offset=1 @@ -124,3 +177,4 @@ attributes #2 = { nounwind readnone speculatable } !36 = !DILocation(line: 14, column: 10, scope: !13) !37 = !DILocation(line: 13, column: 67, scope: !13) !38 = !DILocation(line: 12, column: 3, scope: !13) + diff --git a/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll b/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll index 98a333d54fd38..66a1cf291cd4c 100644 --- a/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll +++ b/llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll @@ -1,6 +1,6 @@ -; RUN: opt -O2 %s | llvm-dis > %t1 -; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK-ALU64 %s -; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK-ALU32 %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -O2 -S < %s | llc -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU64 %s +; RUN: opt -O2 -S < %s | llc -mattr=+alu32 -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU32 %s ; Source code: ; struct s { ; unsigned long long f1; @@ -26,6 +26,68 @@ target triple = "bpfel" ; Function Attrs: nounwind readnone define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 { +; CHECK-ALU64-LABEL: test: +; CHECK-ALU64: .Ltest$local: +; CHECK-ALU64-NEXT: .type .Ltest$local,@function +; CHECK-ALU64-NEXT: .Lfunc_begin0: +; CHECK-ALU64-NEXT: .loc 1 11 0 # test.c:11:0 +; CHECK-ALU64-NEXT: .cfi_sections .debug_frame +; CHECK-ALU64-NEXT: .cfi_startproc +; CHECK-ALU64-NEXT: # %bb.0: # %entry +; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1 +; CHECK-ALU64-NEXT: .Ltmp0: +; CHECK-ALU64-NEXT: r1 = 20 +; CHECK-ALU64-NEXT: .Ltmp1: +; CHECK-ALU64-NEXT: .Ltmp2: +; CHECK-ALU64-NEXT: .Ltmp3: +; CHECK-ALU64-NEXT: r0 = 4 +; CHECK-ALU64-NEXT: .Ltmp4: +; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69 +; CHECK-ALU64-NEXT: .Ltmp5: +; CHECK-ALU64-NEXT: .Ltmp6: +; CHECK-ALU64-NEXT: r0 += r1 +; CHECK-ALU64-NEXT: .Ltmp7: +; CHECK-ALU64-NEXT: r1 = 50 +; CHECK-ALU64-NEXT: .loc 1 13 67 # test.c:13:67 +; CHECK-ALU64-NEXT: .Ltmp8: +; CHECK-ALU64-NEXT: r0 += r1 +; CHECK-ALU64-NEXT: .loc 1 12 3 # test.c:12:3 +; CHECK-ALU64-NEXT: .Ltmp9: +; CHECK-ALU64-NEXT: exit +; CHECK-ALU64-NEXT: .Ltmp10: +; CHECK-ALU64-NEXT: .Ltmp11: +; +; CHECK-ALU32-LABEL: test: +; CHECK-ALU32: .Ltest$local: +; CHECK-ALU32-NEXT: .type .Ltest$local,@function +; CHECK-ALU32-NEXT: .Lfunc_begin0: +; CHECK-ALU32-NEXT: .loc 1 11 0 # test.c:11:0 +; CHECK-ALU32-NEXT: .cfi_sections .debug_frame +; CHECK-ALU32-NEXT: .cfi_startproc +; CHECK-ALU32-NEXT: # %bb.0: # %entry +; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1 +; CHECK-ALU32-NEXT: .Ltmp0: +; CHECK-ALU32-NEXT: r1 = 20 +; CHECK-ALU32-NEXT: .Ltmp1: +; CHECK-ALU32-NEXT: .Ltmp2: +; CHECK-ALU32-NEXT: .Ltmp3: +; CHECK-ALU32-NEXT: r0 = 4 +; CHECK-ALU32-NEXT: .Ltmp4: +; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69 +; CHECK-ALU32-NEXT: .Ltmp5: +; CHECK-ALU32-NEXT: .Ltmp6: +; CHECK-ALU32-NEXT: w0 += w1 +; CHECK-ALU32-NEXT: .Ltmp7: +; CHECK-ALU32-NEXT: r1 = 50 +; CHECK-ALU32-NEXT: .loc 1 13 67 # test.c:13:67 +; CHECK-ALU32-NEXT: .Ltmp8: +; CHECK-ALU32-NEXT: w0 += w1 +; CHECK-ALU32-NEXT: # kill: def $w0 killed $w0 killed $r0 +; CHECK-ALU32-NEXT: .loc 1 12 3 # test.c:12:3 +; CHECK-ALU32-NEXT: .Ltmp9: +; CHECK-ALU32-NEXT: exit +; CHECK-ALU32-NEXT: .Ltmp10: +; CHECK-ALU32-NEXT: .Ltmp11: entry: call void @llvm.dbg.value(metadata ptr %arg, metadata !30, metadata !DIExpression()), !dbg !31 %0 = tail call ptr @llvm.preserve.struct.access.index.p0.p0.ss(ptr elementtype(%struct.s) %arg, i32 5, i32 6), !dbg !32, !llvm.preserve.access.index !18 @@ -37,15 +99,6 @@ entry: ret i32 %add1, !dbg !38 } -; CHECK: r1 = 20 -; CHECK: r0 = 4 -; CHECK-ALU64: r0 += r1 -; CHECK-ALU32: w0 += w1 -; CHECK-EL: r1 = 50 -; CHECK-ALU64: r0 += r1 -; CHECK-ALU32: w0 += w1 -; CHECK: exit - ; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2) ; CHECK: .byte 115 # string offset=1 diff --git a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll index dc9731a2cd8bc..1fcb6047797e3 100644 --- a/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll +++ b/llvm/test/Instrumentation/SanitizerCoverage/cmp-tracing-api-x86_32.ll @@ -1,8 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; Test -sanitizer-coverage-trace-compares=1 API declarations on a non-x86_64 arch ; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s target triple = "i386-unknown-linux-gnu" define i32 @foo() #0 { +; CHECK-LABEL: define i32 @foo() comdat { +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__sanitizer_cov_trace_pc_guard(ptr @__sancov_gen_) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: ret i32 0 +; entry: ret i32 0 } diff --git a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll index 51d9e5f1ba32f..be7ae0ef3f42c 100644 --- a/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll @@ -1,10 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s -; CHECK-LABEL: @no_sink_local_to_flat( -; CHECK: addrspacecast -; CHECK: br -; CHECK-NOT: addrspacecast define i64 @no_sink_local_to_flat(i1 %pred, ptr addrspace(3) %ptr) { +; CHECK-LABEL: define i64 @no_sink_local_to_flat( +; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(3) [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(3) [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr addrspace(3) %ptr to ptr br i1 %pred, label %l1, label %l2 @@ -17,11 +25,18 @@ l2: ret i64 %v2 } -; CHECK-LABEL: @no_sink_private_to_flat( -; CHECK: addrspacecast -; CHECK: br -; CHECK-NOT: addrspacecast define i64 @no_sink_private_to_flat(i1 %pred, ptr addrspace(5) %ptr) { +; CHECK-LABEL: define i64 @no_sink_private_to_flat( +; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(5) [[PTR:%.*]]) { +; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr addrspace(5) %ptr to ptr br i1 %pred, label %l1, label %l2 @@ -35,11 +50,18 @@ l2: } -; CHECK-LABEL: @sink_global_to_flat( -; CHECK-NOT: addrspacecast -; CHECK: br -; CHECK: addrspacecast define i64 @sink_global_to_flat(i1 %pred, ptr addrspace(1) %ptr) { +; CHECK-LABEL: define i64 @sink_global_to_flat( +; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(1) [[PTR:%.*]]) { +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[TMP1]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr addrspace(1) %ptr to ptr br i1 %pred, label %l1, label %l2 @@ -52,11 +74,18 @@ l2: ret i64 %v2 } -; CHECK-LABEL: @sink_flat_to_global( -; CHECK-NOT: addrspacecast -; CHECK: br -; CHECK: addrspacecast define i64 @sink_flat_to_global(i1 %pred, ptr %ptr) { +; CHECK-LABEL: define i64 @sink_flat_to_global( +; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1) +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr %ptr to ptr addrspace(1) br i1 %pred, label %l1, label %l2 @@ -69,11 +98,18 @@ l2: ret i64 %v2 } -; CHECK-LABEL: @sink_flat_to_constant( -; CHECK-NOT: addrspacecast -; CHECK: br -; CHECK: addrspacecast define i64 @sink_flat_to_constant(i1 %pred, ptr %ptr) { +; CHECK-LABEL: define i64 @sink_flat_to_constant( +; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(4) +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr %ptr to ptr addrspace(4) br i1 %pred, label %l1, label %l2 @@ -86,11 +122,18 @@ l2: ret i64 %v2 } -; CHECK-LABEL: @sink_flat_to_local( -; CHECK-NOT: addrspacecast -; CHECK: br -; CHECK: addrspacecast define i64 @sink_flat_to_local(i1 %pred, ptr %ptr) { +; CHECK-LABEL: define i64 @sink_flat_to_local( +; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3) +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(3) [[TMP1]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr %ptr to ptr addrspace(3) br i1 %pred, label %l1, label %l2 @@ -103,11 +146,18 @@ l2: ret i64 %v2 } -; CHECK-LABEL: @sink_flat_to_private( -; CHECK-NOT: addrspacecast -; CHECK: br -; CHECK: addrspacecast define i64 @sink_flat_to_private(i1 %pred, ptr %ptr) { +; CHECK-LABEL: define i64 @sink_flat_to_private( +; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]] +; CHECK: l1: +; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4 +; CHECK-NEXT: ret i64 [[V1]] +; CHECK: l2: +; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5) +; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 4 +; CHECK-NEXT: ret i64 [[V2]] +; %ptr_cast = addrspacecast ptr %ptr to ptr addrspace(5) br i1 %pred, label %l1, label %l2 diff --git a/llvm/test/Transforms/SafeStack/X86/setjmp2.ll b/llvm/test/Transforms/SafeStack/X86/setjmp2.ll index 97cbc2e27067b..7fbd4506bea78 100644 --- a/llvm/test/Transforms/SafeStack/X86/setjmp2.ll +++ b/llvm/test/Transforms/SafeStack/X86/setjmp2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s ; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s @@ -9,32 +10,36 @@ ; setjmp/longjmp test with dynamically sized array. ; Requires protector. -; CHECK: @foo(i32 %[[ARG:.*]]) define i32 @foo(i32 %size) nounwind uwtable safestack { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UNSAFE_STACK_PTR:%.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr, align 8 +; CHECK-NEXT: [[UNSAFE_STACK_DYNAMIC_PTR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: store ptr [[UNSAFE_STACK_PTR]], ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -16 +; CHECK-NEXT: [[A:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: store ptr [[A]], ptr @__safestack_unsafe_stack_ptr, align 8 +; CHECK-NEXT: store ptr [[A]], ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8 +; CHECK-NEXT: [[CALL:%.*]] = call i32 @_setjmp(ptr @buf) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8 +; CHECK-NEXT: store ptr [[TMP6]], ptr @__safestack_unsafe_stack_ptr, align 8 +; CHECK-NEXT: call void @funcall(ptr [[A]]) +; CHECK-NEXT: store ptr [[UNSAFE_STACK_PTR]], ptr @__safestack_unsafe_stack_ptr, align 8 +; CHECK-NEXT: ret i32 0 +; entry: - ; CHECK: %[[SP:.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr - ; CHECK-NEXT: %[[DYNPTR:.*]] = alloca ptr - ; CHECK-NEXT: store ptr %[[SP]], ptr %[[DYNPTR]] - ; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[ARG]] to i64 - ; CHECK-NEXT: %[[MUL:.*]] = mul i64 %[[ZEXT]], 4 - ; CHECK-NEXT: %[[SP2:.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr - ; CHECK-NEXT: %[[PTRTOINT:.*]] = ptrtoint ptr %[[SP2]] to i64 - ; CHECK-NEXT: %[[SUB:.*]] = sub i64 %[[PTRTOINT]], %[[MUL]] - ; CHECK-NEXT: %[[AND:.*]] = and i64 %[[SUB]], -16 - ; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to ptr - ; CHECK-NEXT: store ptr %[[INTTOPTR]], ptr @__safestack_unsafe_stack_ptr - ; CHECK-NEXT: store ptr %[[INTTOPTR]], ptr %unsafe_stack_dynamic_ptr %a = alloca i32, i32 %size - ; CHECK: setjmp - ; CHECK-NEXT: %[[LOAD:.*]] = load ptr, ptr %[[DYNPTR]] - ; CHECK-NEXT: store ptr %[[LOAD]], ptr @__safestack_unsafe_stack_ptr %call = call i32 @_setjmp(ptr @buf) returns_twice - ; CHECK: call void @funcall(ptr %[[INTTOPTR]]) call void @funcall(ptr %a) - ; CHECK-NEXT: store ptr %[[SP:.*]], ptr @__safestack_unsafe_stack_ptr ret i32 0 } diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll index 5a76104c7a65f..5ae4ea9dad7de 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt < %s -mtriple=nvptx64-nvidia-cuda -passes=separate-const-offset-from-gep \ ; RUN: -reassociate-geps-verify-no-dead-code -S | FileCheck %s @@ -13,19 +14,35 @@ ; We should not extract any struct field indices, because fields in a struct ; may have different types. define ptr @struct(i32 %i) { +; CHECK-LABEL: define ptr @struct( +; CHECK-SAME: i32 [[I:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [1024 x %struct.S], ptr @struct_array, i64 0, i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i64 10 +; CHECK-NEXT: ret ptr [[P2]] +; entry: %add = add nsw i32 %i, 5 %idxprom = sext i32 %add to i64 %p = getelementptr inbounds [1024 x %struct.S], ptr @struct_array, i64 0, i64 %idxprom, i32 1 ret ptr %p } -; CHECK-LABEL: @struct( -; CHECK: getelementptr [1024 x %struct.S], ptr @struct_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1 ; We should be able to trace into sext(a + b) if a + b is non-negative ; (e.g., used as an index of an inbounds GEP) and one of a and b is ; non-negative. define ptr @sext_add(i32 %i, i32 %j) { +; CHECK-LABEL: define ptr @sext_add( +; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[J]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[TMP2]], i64 [[TMP1]] +; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 32 +; CHECK-NEXT: ret ptr [[P1]] +; entry: %0 = add i32 %i, 1 %1 = sext i32 %0 to i64 ; inbound sext(i + 1) = sext(i) + 1 @@ -35,12 +52,6 @@ entry: %p = getelementptr inbounds [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %1, i64 %3 ret ptr %p } -; CHECK-LABEL: @sext_add( -; CHECK-NOT: = add -; CHECK: add i32 %j, -2 -; CHECK: sext -; CHECK: getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr inbounds float, ptr %{{[a-zA-Z0-9]+}}, i64 32 ; We should be able to trace into sext/zext if it can be distributed to both ; operands, e.g., sext (add nsw a, b) == add nsw (sext a), (sext b) @@ -50,6 +61,16 @@ entry: ; to ; gep base, a + sext(b), c + zext(d); gep ..., 1 * 32 + 1 define ptr @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) { +; CHECK-LABEL: define ptr @ext_add_no_overflow( +; CHECK-SAME: i64 [[A:%.*]], i32 [[B:%.*]], i64 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B]] to i64 +; CHECK-NEXT: [[I2:%.*]] = add i64 [[A]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[D]] to i64 +; CHECK-NEXT: [[J4:%.*]] = add i64 [[C]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[I2]], i64 [[J4]] +; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i64 33 +; CHECK-NEXT: ret ptr [[P5]] +; %b1 = add nsw i32 %b, 1 %b2 = sext i32 %b1 to i64 %i = add i64 %a, %b2 ; i = a + sext(b +nsw 1) @@ -59,12 +80,30 @@ define ptr @ext_add_no_overflow(i64 %a, i32 %b, i64 %c, i32 %d) { %p = getelementptr inbounds [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %i, i64 %j ret ptr %p } -; CHECK-LABEL: @ext_add_no_overflow( -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr inbounds float, ptr [[BASE_PTR]], i64 33 ; Verifies we handle nested sext/zext correctly. define void @sext_zext(i32 %a, i32 %b, ptr %out1, ptr %out2) { +; CHECK-LABEL: define void @sext_zext( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], ptr [[OUT1:%.*]], ptr [[OUT2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[B]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i48 +; CHECK-NEXT: [[TMP2:%.*]] = zext i48 [[TMP1]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[A]] to i48 +; CHECK-NEXT: [[TMP4:%.*]] = zext i48 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[TMP4]], i64 [[TMP2]] +; CHECK-NEXT: [[P11:%.*]] = getelementptr float, ptr [[TMP5]], i64 32 +; CHECK-NEXT: store ptr [[P11]], ptr [[OUT1]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = add nsw i32 [[B]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i48 +; CHECK-NEXT: [[TMP8:%.*]] = sext i48 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[A]] to i48 +; CHECK-NEXT: [[TMP10:%.*]] = sext i48 [[TMP9]] to i64 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[TMP10]], i64 [[TMP8]] +; CHECK-NEXT: [[P22:%.*]] = getelementptr float, ptr [[TMP11]], i64 96 +; CHECK-NEXT: store ptr [[P22]], ptr [[OUT2]], align 8 +; CHECK-NEXT: ret void +; entry: %0 = add nsw nuw i32 %a, 1 %1 = sext i32 %0 to i48 @@ -84,15 +123,23 @@ entry: store ptr %p2, ptr %out2 ret void } -; CHECK-LABEL: @sext_zext( -; CHECK: [[BASE_PTR_1:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, ptr [[BASE_PTR_1]], i64 32 -; CHECK: [[BASE_PTR_2:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, ptr [[BASE_PTR_2]], i64 96 ; Similar to @ext_add_no_overflow, we should be able to trace into s/zext if ; its operand is an OR and the two operands of the OR have no common bits. define ptr @sext_or(i64 %a, i32 %b) { +; CHECK-LABEL: define ptr @sext_or( +; CHECK-SAME: i64 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B1:%.*]] = shl i32 [[B]], 2 +; CHECK-NEXT: [[B3:%.*]] = or i32 [[B1]], 4 +; CHECK-NEXT: [[B3_EXT:%.*]] = sext i32 [[B3]] to i64 +; CHECK-NEXT: [[J:%.*]] = add i64 [[A]], [[B3_EXT]] +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[B1]] to i64 +; CHECK-NEXT: [[I2:%.*]] = add i64 [[A]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[I2]], i64 [[J]] +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 32 +; CHECK-NEXT: ret ptr [[P3]] +; entry: %b1 = shl i32 %b, 2 %b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits @@ -104,14 +151,21 @@ entry: %p = getelementptr inbounds [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %i, i64 %j ret ptr %p } -; CHECK-LABEL: @sext_or( -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr inbounds float, ptr [[BASE_PTR]], i64 32 ; The subexpression (b + 5) is used in both "i = a + (b + 5)" and "*out = b + ; 5". When extracting the constant offset 5, make sure "*out = b + 5" isn't ; affected. define ptr @expr(i64 %a, i64 %b, ptr %out) { +; CHECK-LABEL: define ptr @expr( +; CHECK-SAME: i64 [[A:%.*]], i64 [[B:%.*]], ptr [[OUT:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B5:%.*]] = add i64 [[B]], 5 +; CHECK-NEXT: [[I2:%.*]] = add i64 [[B]], [[A]] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[I2]], i64 0 +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 160 +; CHECK-NEXT: store i64 [[B5]], ptr [[OUT]], align 4 +; CHECK-NEXT: ret ptr [[P3]] +; entry: %b5 = add i64 %b, 5 %i = add i64 %b5, %a @@ -119,13 +173,22 @@ entry: store i64 %b5, ptr %out ret ptr %p } -; CHECK-LABEL: @expr( -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 0 -; CHECK: getelementptr inbounds float, ptr [[BASE_PTR]], i64 160 -; CHECK: store i64 %b5, ptr %out ; d + sext(a +nsw (b +nsw (c +nsw 8))) => (d + sext(a) + sext(b) + sext(c)) + 8 define ptr @sext_expr(i32 %a, i32 %b, i32 %c, i64 %d) { +; CHECK-LABEL: define ptr @sext_expr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]], i64 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[B]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[C]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], [[TMP3]] +; CHECK-NEXT: [[I1:%.*]] = add i64 [[D]], [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 [[I1]] +; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i64 8 +; CHECK-NEXT: ret ptr [[P2]] +; entry: %0 = add nsw i32 %c, 8 %1 = add nsw i32 %b, %0 @@ -135,28 +198,36 @@ entry: %p = getelementptr inbounds [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %i ret ptr %p } -; CHECK-LABEL: @sext_expr( -; CHECK: sext i32 -; CHECK: sext i32 -; CHECK: sext i32 -; CHECK: getelementptr inbounds float, ptr %{{[a-zA-Z0-9]+}}, i64 8 ; Verifies we handle "sub" correctly. define ptr @sub(i64 %i, i64 %j) { +; CHECK-LABEL: define ptr @sub( +; CHECK-SAME: i64 [[I:%.*]], i64 [[J:%.*]]) { +; CHECK-NEXT: [[J22:%.*]] = sub i64 0, [[J]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 [[I]], i64 [[J22]] +; CHECK-NEXT: [[P3:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i64 -155 +; CHECK-NEXT: ret ptr [[P3]] +; %i2 = sub i64 %i, 5 ; i - 5 %j2 = sub i64 5, %j ; 5 - i %p = getelementptr inbounds [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %i2, i64 %j2 ret ptr %p } -; CHECK-LABEL: @sub( -; CHECK: %[[j2:[a-zA-Z0-9]+]] = sub i64 0, %j -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 %i, i64 %[[j2]] -; CHECK: getelementptr inbounds float, ptr [[BASE_PTR]], i64 -155 %struct.Packed = type <{ [3 x i32], [8 x i64] }> ; <> means packed ; Verifies we can emit correct uglygep if the address is not natually aligned. define ptr @packed_struct(i32 %i, i32 %j) { +; CHECK-LABEL: define ptr @packed_struct( +; CHECK-SAME: i32 [[I:%.*]], i32 [[J:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[S:%.*]] = alloca [1024 x %struct.Packed], align 16 +; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[J]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [1024 x %struct.Packed], ptr [[S]], i64 0, i64 [[TMP0]], i32 1, i64 [[TMP1]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 100 +; CHECK-NEXT: ret ptr [[UGLYGEP]] +; entry: %s = alloca [1024 x %struct.Packed], align 16 %add = add nsw i32 %j, 3 @@ -166,14 +237,19 @@ entry: %arrayidx3 = getelementptr inbounds [1024 x %struct.Packed], ptr %s, i64 0, i64 %idxprom2, i32 1, i64 %idxprom ret ptr %arrayidx3 } -; CHECK-LABEL: @packed_struct( -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [1024 x %struct.Packed], ptr %s, i64 0, i64 %{{[a-zA-Z0-9]+}}, i32 1, i64 %{{[a-zA-Z0-9]+}} -; CHECK: %uglygep = getelementptr inbounds i8, ptr [[BASE_PTR]], i64 100 -; CHECK-NEXT: ret ptr %uglygep ; We shouldn't be able to extract the 8 from "zext(a +nuw (b + 8))", ; because "zext(b + 8) != zext(b) + 8" define ptr @zext_expr(i32 %a, i32 %b) { +; CHECK-LABEL: define ptr @zext_expr( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[B]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[A]], [[TMP0]] +; CHECK-NEXT: [[I:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: [[P:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 [[I]] +; CHECK-NEXT: ret ptr [[P]] +; entry: %0 = add i32 %b, 8 %1 = add nuw i32 %a, %0 @@ -181,8 +257,6 @@ entry: %p = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %i ret ptr %p } -; CHECK-LABEL: zext_expr( -; CHECK: getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %i ; Per http://llvm.org/docs/LangRef.html#id181, the indices of a off-bound gep ; should be considered sign-extended to the pointer size. Therefore, @@ -193,20 +267,34 @@ entry: ; This test verifies we do not illegitimately extract the 8 from ; gep base, (i32 a + 8) define ptr @i32_add(i32 %a) { +; CHECK-LABEL: define ptr @i32_add( +; CHECK-SAME: i32 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[I:%.*]] = add i32 [[A]], 8 +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[I]] to i64 +; CHECK-NEXT: [[P:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: ret ptr [[P]] +; entry: %i = add i32 %a, 8 %p = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i32 %i ret ptr %p } -; CHECK-LABEL: @i32_add( -; CHECK: getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}} -; CHECK-NOT: getelementptr ; Verifies that we compute the correct constant offset when the index is ; sign-extended and then zero-extended. The old version of our code failed to ; handle this case because it simply computed the constant offset as the ; sign-extended value of the constant part of the GEP index. define ptr @apint(i1 %a) { +; CHECK-LABEL: define ptr @apint( +; CHECK-SAME: i1 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = sext i1 [[A]] to i4 +; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 [[TMP1]] +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, ptr [[TMP2]], i64 15 +; CHECK-NEXT: ret ptr [[P1]] +; entry: %0 = add nsw nuw i1 %a, 1 %1 = sext i1 %0 to i4 @@ -214,39 +302,45 @@ entry: %p = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %2 ret ptr %p } -; CHECK-LABEL: @apint( -; CHECK: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %{{[a-zA-Z0-9]+}} -; CHECK: getelementptr float, ptr [[BASE_PTR]], i64 15 ; Do not trace into binary operators other than ADD, SUB, and OR. define ptr @and(i64 %a) { +; CHECK-LABEL: define ptr @and( +; CHECK-SAME: i64 [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[A]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1 +; CHECK-NEXT: [[P:%.*]] = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 [[TMP1]] +; CHECK-NEXT: ret ptr [[P]] +; entry: %0 = shl i64 %a, 2 %1 = and i64 %0, 1 %p = getelementptr [32 x [32 x float]], ptr @float_2d_array, i64 0, i64 0, i64 %1 ret ptr %p } -; CHECK-LABEL: @and( -; CHECK: getelementptr [32 x [32 x float]], ptr @float_2d_array -; CHECK-NOT: getelementptr ; The code that rebuilds an OR expression used to be buggy, and failed on this ; test. define ptr @shl_add_or(i64 %a, ptr %ptr) { -; CHECK-LABEL: @shl_add_or( +; CHECK-LABEL: define ptr @shl_add_or( +; CHECK-SAME: i64 [[A:%.*]], ptr [[PTR:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 2 +; CHECK-NEXT: [[OR2:%.*]] = add i64 [[SHL]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[PTR]], i64 [[OR2]] +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, ptr [[TMP0]], i64 12 +; CHECK-NEXT: ret ptr [[P3]] +; entry: %shl = shl i64 %a, 2 %add = add i64 %shl, 12 %or = or i64 %add, 1 -; CHECK: [[OR:%or[0-9]*]] = add i64 %shl, 1 ; ((a << 2) + 12) and 1 have no common bits. Therefore, ; SeparateConstOffsetFromGEP is able to extract the 12. ; TODO(jingyue): We could reassociate the expression to combine 12 and 1. %p = getelementptr float, ptr %ptr, i64 %or -; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr float, ptr %ptr, i64 [[OR]] -; CHECK: getelementptr float, ptr [[PTR]], i64 12 ret ptr %p -; CHECK-NEXT: ret } ; The source code used to be buggy in checking @@ -260,40 +354,46 @@ entry: %struct1 = type { i64, %struct2 } %struct0 = type { i32, i32, ptr, [100 x %struct1] } define ptr @sign_mod_unsign(ptr %ptr, i64 %idx) { -; CHECK-LABEL: @sign_mod_unsign( +; CHECK-LABEL: define ptr @sign_mod_unsign( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR22:%.*]] = getelementptr inbounds [[STRUCT2:%.*]], ptr [[TMP0]], i64 -3 +; CHECK-NEXT: ret ptr [[PTR22]] +; entry: %arrayidx = add nsw i64 %idx, -2 -; CHECK-NOT: add %ptr2 = getelementptr inbounds %struct0, ptr %ptr, i64 0, i32 3, i64 %arrayidx, i32 1 -; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, ptr %ptr, i64 0, i32 3, i64 %idx, i32 1 -; CHECK: getelementptr inbounds %struct2, ptr [[PTR]], i64 -3 ret ptr %ptr2 -; CHECK-NEXT: ret } ; Check that we can see through explicit trunc() instruction. define ptr @trunk_explicit(ptr %ptr, i64 %idx) { -; CHECK-LABEL: @trunk_explicit( +; CHECK-LABEL: define ptr @trunk_explicit( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds [[STRUCT2:%.*]], ptr [[TMP0]], i64 151 +; CHECK-NEXT: ret ptr [[PTR21]] +; entry: %idx0 = trunc i64 1 to i32 %ptr2 = getelementptr inbounds %struct0, ptr %ptr, i32 %idx0, i32 3, i64 %idx, i32 1 -; CHECK-NOT: trunc -; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, ptr %ptr, i64 0, i32 3, i64 %idx, i32 1 -; CHECK: getelementptr inbounds %struct2, ptr %0, i64 151 ret ptr %ptr2 -; CHECK-NEXT: ret } ; Check that we can deal with trunc inserted by ; canonicalizeArrayIndicesToPointerSize() if size of an index is larger than ; that of the pointer. define ptr @trunk_long_idx(ptr %ptr, i64 %idx) { -; CHECK-LABEL: @trunk_long_idx( +; CHECK-LABEL: define ptr @trunk_long_idx( +; CHECK-SAME: ptr [[PTR:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT0:%.*]], ptr [[PTR]], i64 0, i32 3, i64 [[IDX]], i32 1 +; CHECK-NEXT: [[PTR21:%.*]] = getelementptr inbounds [[STRUCT2:%.*]], ptr [[TMP0]], i64 151 +; CHECK-NEXT: ret ptr [[PTR21]] +; entry: %ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1 -; CHECK-NOT: trunc -; CHECK: [[PTR:%[a-zA-Z0-9]+]] = getelementptr %struct0, ptr %ptr, i64 0, i32 3, i64 %idx, i32 1 -; CHECK: getelementptr inbounds %struct2, ptr %0, i64 151 ret ptr %ptr2 -; CHECK-NEXT: ret }