99 changes: 70 additions & 29 deletions llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,38 @@
; does not happen because when SILoadStoreOptimizer is run, the reads and writes
; are not adjacent. They are only moved later by MachineScheduler.

; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x2:
; GCN: ds_write_b32
; GCN: ds_write_b32
; GCN: ds_read_b32
; GCN: ds_read_b32

; CHECK-LABEL: @no_clobber_ds_load_stores_x2
; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4
; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !1, !noalias !4
; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1
; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !4, !noalias !1

define amdgpu_kernel void @no_clobber_ds_load_stores_x2(ptr addrspace(1) %arg, i32 %i) {
; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x2(
; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, align 16, !alias.scope !1, !noalias !4
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !1, !noalias !4
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), align 16, !alias.scope !4, !noalias !1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X2_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x2.lds, i32 0, i32 1), i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !4, !noalias !1
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
; CHECK-NEXT: ret void
;
; GCN-LABEL: no_clobber_ds_load_stores_x2:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s2, s[0:1], 0x2c
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 2
; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s2, s2, 2
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: ds_write_b32 v1, v2 offset:256
; GCN-NEXT: ds_read_b32 v2, v0
; GCN-NEXT: ds_read_b32 v0, v0 offset:256
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: global_store_dword v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
bb:
store i32 1, ptr addrspace(3) @a, align 4
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
Expand All @@ -35,24 +54,46 @@ bb:
ret void
}

; GCN-LABEL: {{^}}no_clobber_ds_load_stores_x3:
; GCN-DAG: ds_write_b32
; GCN-DAG: ds_write_b32
; GCN-DAG: ds_write_b32
; GCN-DAG: ds_read_b32
; GCN-DAG: ds_read_b32
; GCN-DAG: ds_read_b32

; CHECK-LABEL: @no_clobber_ds_load_stores_x3
; CHECK: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9
; CHECK: %gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 %i
; CHECK: %val.a = load i32, ptr addrspace(3) %gep.a, align 4, !alias.scope !6, !noalias !9
; CHECK: store i32 2, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13
; CHECK: %val.b = load i32, ptr addrspace(3) %gep.b, align 4, !alias.scope !12, !noalias !13
; CHECK: store i32 3, ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15
; CHECK: %val.c = load i32, ptr addrspace(3) %gep.c, align 4, !alias.scope !14, !noalias !15

define amdgpu_kernel void @no_clobber_ds_load_stores_x3(ptr addrspace(1) %arg, i32 %i) {
; CHECK-LABEL: define amdgpu_kernel void @no_clobber_ds_load_stores_x3(
; CHECK-SAME: ptr addrspace(1) [[ARG:%.*]], i32 [[I:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: store i32 1, ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, align 16, !alias.scope !6, !noalias !9
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr addrspace(3) [[GEP_A]], align 4, !alias.scope !6, !noalias !9
; CHECK-NEXT: store i32 2, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), align 16, !alias.scope !12, !noalias !13
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 1), i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr addrspace(3) [[GEP_B]], align 4, !alias.scope !12, !noalias !13
; CHECK-NEXT: store i32 3, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), align 16, !alias.scope !14, !noalias !15
; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [64 x i32], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_NO_CLOBBER_DS_LOAD_STORES_X3_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.no_clobber_ds_load_stores_x3.lds, i32 0, i32 2), i32 0, i32 [[I]]
; CHECK-NEXT: [[VAL_C:%.*]] = load i32, ptr addrspace(3) [[GEP_C]], align 4, !alias.scope !14, !noalias !15
; CHECK-NEXT: [[VAL_1:%.*]] = add i32 [[VAL_A]], [[VAL_B]]
; CHECK-NEXT: [[VAL:%.*]] = add i32 [[VAL_1]], [[VAL_C]]
; CHECK-NEXT: store i32 [[VAL]], ptr addrspace(1) [[ARG]], align 4
; CHECK-NEXT: ret void
;
; GCN-LABEL: no_clobber_ds_load_stores_x3:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s2, s[0:1], 0x2c
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, 2
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: ds_write_b32 v1, v2 offset:256
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s2, s2, 2
; GCN-NEXT: v_mov_b32_e32 v2, 3
; GCN-NEXT: ds_write_b32 v1, v0
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: ds_write_b32 v1, v2 offset:512
; GCN-NEXT: ds_read_b32 v2, v0
; GCN-NEXT: ds_read_b32 v3, v0 offset:256
; GCN-NEXT: ds_read_b32 v0, v0 offset:512
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_u32_e32 v2, v2, v3
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: global_store_dword v1, v0, s[0:1]
; GCN-NEXT: s_endpgm
bb:
store i32 1, ptr addrspace(3) @a, align 4
%gep.a = getelementptr inbounds [64 x i32], ptr addrspace(3) @a, i32 0, i32 %i
Expand Down
14 changes: 12 additions & 2 deletions llvm/test/CodeGen/AMDGPU/lower-module-lds-used-list.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s

Expand Down Expand Up @@ -32,13 +33,22 @@

; Functions that are not called are ignored by the lowering
define amdgpu_kernel void @call_func() {
; CHECK-LABEL: define amdgpu_kernel void @call_func(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @func()
; CHECK-NEXT: ret void
;
call void @func()
ret void
}

; CHECK-LABEL: @func()
; CHECK: %dec = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.000000e+00 monotonic, align 8
define void @func() {
; CHECK-LABEL: define void @func() {
; CHECK-NEXT: [[DEC:%.*]] = atomicrmw fsub ptr addrspace(3) @llvm.amdgcn.module.lds, float 1.000000e+00 monotonic, align 8
; CHECK-NEXT: [[UNUSED0:%.*]] = atomicrmw add ptr addrspace(1) @ignored, i64 1 monotonic, align 8
; CHECK-NEXT: ret void
;
%dec = atomicrmw fsub ptr addrspace(3) @tolower, float 1.0 monotonic
%unused0 = atomicrmw add ptr addrspace(1) @ignored, i64 1 monotonic
ret void
Expand Down
76 changes: 49 additions & 27 deletions llvm/test/CodeGen/AMDGPU/lower-multiple-ctor-dtor.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-ctor-dtor < %s | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS


; UTC_ARGS: --disable
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]

Expand All @@ -9,32 +12,8 @@
; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @__fini_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
; CHECK: @llvm.used = appending global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini]
; UTC_ARGS: --enable

; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init() #0
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void

; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini() #1
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void

; CHECK-VIS: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init
; CHECK-VIS: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
Expand All @@ -57,5 +36,48 @@ define internal void @bar.5() {
ret void
}

; CHECK: attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
; CHECK: attributes #1 = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
; CHECK-LABEL: define internal void @foo() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @bar() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @foo.5() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define internal void @bar.5() {
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__init_array_start, ptr addrspace(1) @__init_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__init_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__init_array_end
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
;
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
; CHECK: while.entry:
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void [[CALLBACK]]()
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
; CHECK: while.end:
; CHECK-NEXT: ret void
;
78 changes: 66 additions & 12 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1-bpfeb.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: opt -O2 %s | llvm-dis > %t1
; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK-ALU64 %s
; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EB,CHECK-ALU32 %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: opt -O2 -S < %s | llc -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU64 %s
; RUN: opt -O2 -S < %s | llc -mattr=+alu32 -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU32 %s
; Source code:
; struct s {
; unsigned long long f1;
Expand All @@ -26,6 +26,68 @@ target triple = "bpfeb"

; Function Attrs: nounwind readnone
define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU64-LABEL: test:
; CHECK-ALU64: .Ltest$local:
; CHECK-ALU64-NEXT: .type .Ltest$local,@function
; CHECK-ALU64-NEXT: .Lfunc_begin0:
; CHECK-ALU64-NEXT: .loc 1 11 0 # test.c:11:0
; CHECK-ALU64-NEXT: .cfi_sections .debug_frame
; CHECK-ALU64-NEXT: .cfi_startproc
; CHECK-ALU64-NEXT: # %bb.0: # %entry
; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU64-NEXT: .Ltmp0:
; CHECK-ALU64-NEXT: r1 = 20
; CHECK-ALU64-NEXT: .Ltmp1:
; CHECK-ALU64-NEXT: .Ltmp2:
; CHECK-ALU64-NEXT: .Ltmp3:
; CHECK-ALU64-NEXT: r0 = 4
; CHECK-ALU64-NEXT: .Ltmp4:
; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU64-NEXT: .Ltmp5:
; CHECK-ALU64-NEXT: .Ltmp6:
; CHECK-ALU64-NEXT: r0 += r1
; CHECK-ALU64-NEXT: .Ltmp7:
; CHECK-ALU64-NEXT: r1 = 45
; CHECK-ALU64-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU64-NEXT: .Ltmp8:
; CHECK-ALU64-NEXT: r0 += r1
; CHECK-ALU64-NEXT: .loc 1 12 3 # test.c:12:3
; CHECK-ALU64-NEXT: .Ltmp9:
; CHECK-ALU64-NEXT: exit
; CHECK-ALU64-NEXT: .Ltmp10:
; CHECK-ALU64-NEXT: .Ltmp11:
;
; CHECK-ALU32-LABEL: test:
; CHECK-ALU32: .Ltest$local:
; CHECK-ALU32-NEXT: .type .Ltest$local,@function
; CHECK-ALU32-NEXT: .Lfunc_begin0:
; CHECK-ALU32-NEXT: .loc 1 11 0 # test.c:11:0
; CHECK-ALU32-NEXT: .cfi_sections .debug_frame
; CHECK-ALU32-NEXT: .cfi_startproc
; CHECK-ALU32-NEXT: # %bb.0: # %entry
; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU32-NEXT: .Ltmp0:
; CHECK-ALU32-NEXT: r1 = 20
; CHECK-ALU32-NEXT: .Ltmp1:
; CHECK-ALU32-NEXT: .Ltmp2:
; CHECK-ALU32-NEXT: .Ltmp3:
; CHECK-ALU32-NEXT: r0 = 4
; CHECK-ALU32-NEXT: .Ltmp4:
; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU32-NEXT: .Ltmp5:
; CHECK-ALU32-NEXT: .Ltmp6:
; CHECK-ALU32-NEXT: w0 += w1
; CHECK-ALU32-NEXT: .Ltmp7:
; CHECK-ALU32-NEXT: r1 = 45
; CHECK-ALU32-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU32-NEXT: .Ltmp8:
; CHECK-ALU32-NEXT: w0 += w1
; CHECK-ALU32-NEXT: # kill: def $w0 killed $w0 killed $r0
; CHECK-ALU32-NEXT: .loc 1 12 3 # test.c:12:3
; CHECK-ALU32-NEXT: .Ltmp9:
; CHECK-ALU32-NEXT: exit
; CHECK-ALU32-NEXT: .Ltmp10:
; CHECK-ALU32-NEXT: .Ltmp11:
entry:
call void @llvm.dbg.value(metadata ptr %arg, metadata !30, metadata !DIExpression()), !dbg !31
%0 = tail call ptr @llvm.preserve.struct.access.index.p0.p0.ss(ptr elementtype(%struct.s) %arg, i32 5, i32 6), !dbg !32, !llvm.preserve.access.index !18
Expand All @@ -37,15 +99,6 @@ entry:
ret i32 %add1, !dbg !38
}

; CHECK: r1 = 20
; CHECK: r0 = 4
; CHECK-ALU64: r0 += r1
; CHECK-ALU32: w0 += w1
; CHECK-EB: r1 = 45
; CHECK-ALU64: r0 += r1
; CHECK-ALU32: w0 += w1
; CHECK: exit

; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2)

; CHECK: .byte 115 # string offset=1
Expand Down Expand Up @@ -124,3 +177,4 @@ attributes #2 = { nounwind readnone speculatable }
!36 = !DILocation(line: 14, column: 10, scope: !13)
!37 = !DILocation(line: 13, column: 67, scope: !13)
!38 = !DILocation(line: 12, column: 3, scope: !13)

77 changes: 65 additions & 12 deletions llvm/test/CodeGen/BPF/CORE/field-reloc-bitfield-1.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; RUN: opt -O2 %s | llvm-dis > %t1
; RUN: llc -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK-ALU64 %s
; RUN: llc -mattr=+alu32 -filetype=asm -o - %t1 | FileCheck -check-prefixes=CHECK,CHECK-EL,CHECK-ALU32 %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: opt -O2 -S < %s | llc -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU64 %s
; RUN: opt -O2 -S < %s | llc -mattr=+alu32 -filetype=asm | FileCheck -check-prefixes=CHECK,CHECK-ALU32 %s
; Source code:
; struct s {
; unsigned long long f1;
Expand All @@ -26,6 +26,68 @@ target triple = "bpfel"

; Function Attrs: nounwind readnone
define dso_local i32 @test(ptr %arg) local_unnamed_addr #0 !dbg !13 {
; CHECK-ALU64-LABEL: test:
; CHECK-ALU64: .Ltest$local:
; CHECK-ALU64-NEXT: .type .Ltest$local,@function
; CHECK-ALU64-NEXT: .Lfunc_begin0:
; CHECK-ALU64-NEXT: .loc 1 11 0 # test.c:11:0
; CHECK-ALU64-NEXT: .cfi_sections .debug_frame
; CHECK-ALU64-NEXT: .cfi_startproc
; CHECK-ALU64-NEXT: # %bb.0: # %entry
; CHECK-ALU64-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU64-NEXT: .Ltmp0:
; CHECK-ALU64-NEXT: r1 = 20
; CHECK-ALU64-NEXT: .Ltmp1:
; CHECK-ALU64-NEXT: .Ltmp2:
; CHECK-ALU64-NEXT: .Ltmp3:
; CHECK-ALU64-NEXT: r0 = 4
; CHECK-ALU64-NEXT: .Ltmp4:
; CHECK-ALU64-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU64-NEXT: .Ltmp5:
; CHECK-ALU64-NEXT: .Ltmp6:
; CHECK-ALU64-NEXT: r0 += r1
; CHECK-ALU64-NEXT: .Ltmp7:
; CHECK-ALU64-NEXT: r1 = 50
; CHECK-ALU64-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU64-NEXT: .Ltmp8:
; CHECK-ALU64-NEXT: r0 += r1
; CHECK-ALU64-NEXT: .loc 1 12 3 # test.c:12:3
; CHECK-ALU64-NEXT: .Ltmp9:
; CHECK-ALU64-NEXT: exit
; CHECK-ALU64-NEXT: .Ltmp10:
; CHECK-ALU64-NEXT: .Ltmp11:
;
; CHECK-ALU32-LABEL: test:
; CHECK-ALU32: .Ltest$local:
; CHECK-ALU32-NEXT: .type .Ltest$local,@function
; CHECK-ALU32-NEXT: .Lfunc_begin0:
; CHECK-ALU32-NEXT: .loc 1 11 0 # test.c:11:0
; CHECK-ALU32-NEXT: .cfi_sections .debug_frame
; CHECK-ALU32-NEXT: .cfi_startproc
; CHECK-ALU32-NEXT: # %bb.0: # %entry
; CHECK-ALU32-NEXT: #DEBUG_VALUE: test:arg <- $r1
; CHECK-ALU32-NEXT: .Ltmp0:
; CHECK-ALU32-NEXT: r1 = 20
; CHECK-ALU32-NEXT: .Ltmp1:
; CHECK-ALU32-NEXT: .Ltmp2:
; CHECK-ALU32-NEXT: .Ltmp3:
; CHECK-ALU32-NEXT: r0 = 4
; CHECK-ALU32-NEXT: .Ltmp4:
; CHECK-ALU32-NEXT: .loc 1 12 69 prologue_end # test.c:12:69
; CHECK-ALU32-NEXT: .Ltmp5:
; CHECK-ALU32-NEXT: .Ltmp6:
; CHECK-ALU32-NEXT: w0 += w1
; CHECK-ALU32-NEXT: .Ltmp7:
; CHECK-ALU32-NEXT: r1 = 50
; CHECK-ALU32-NEXT: .loc 1 13 67 # test.c:13:67
; CHECK-ALU32-NEXT: .Ltmp8:
; CHECK-ALU32-NEXT: w0 += w1
; CHECK-ALU32-NEXT: # kill: def $w0 killed $w0 killed $r0
; CHECK-ALU32-NEXT: .loc 1 12 3 # test.c:12:3
; CHECK-ALU32-NEXT: .Ltmp9:
; CHECK-ALU32-NEXT: exit
; CHECK-ALU32-NEXT: .Ltmp10:
; CHECK-ALU32-NEXT: .Ltmp11:
entry:
call void @llvm.dbg.value(metadata ptr %arg, metadata !30, metadata !DIExpression()), !dbg !31
%0 = tail call ptr @llvm.preserve.struct.access.index.p0.p0.ss(ptr elementtype(%struct.s) %arg, i32 5, i32 6), !dbg !32, !llvm.preserve.access.index !18
Expand All @@ -37,15 +99,6 @@ entry:
ret i32 %add1, !dbg !38
}

; CHECK: r1 = 20
; CHECK: r0 = 4
; CHECK-ALU64: r0 += r1
; CHECK-ALU32: w0 += w1
; CHECK-EL: r1 = 50
; CHECK-ALU64: r0 += r1
; CHECK-ALU32: w0 += w1
; CHECK: exit

; CHECK: .long 1 # BTF_KIND_STRUCT(id = 2)

; CHECK: .byte 115 # string offset=1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; Test -sanitizer-coverage-trace-compares=1 API declarations on a non-x86_64 arch
; RUN: opt < %s -passes='module(sancov-module)' -sanitizer-coverage-level=1 -sanitizer-coverage-trace-compares=1 -S | FileCheck %s

target triple = "i386-unknown-linux-gnu"
define i32 @foo() #0 {
; CHECK-LABEL: define i32 @foo() comdat {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @__sanitizer_cov_trace_pc_guard(ptr @__sancov_gen_) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: ret i32 0
;
entry:
ret i32 0
}
Expand Down
106 changes: 78 additions & 28 deletions llvm/test/Transforms/CodeGenPrepare/AMDGPU/sink-addrspacecast.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S -codegenprepare -mtriple=amdgcn--amdhsa < %s | FileCheck %s

; CHECK-LABEL: @no_sink_local_to_flat(
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_local_to_flat(i1 %pred, ptr addrspace(3) %ptr) {
; CHECK-LABEL: define i64 @no_sink_local_to_flat(
; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(3) [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(3) [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(3) %ptr to ptr
br i1 %pred, label %l1, label %l2

Expand All @@ -17,11 +25,18 @@ l2:
ret i64 %v2
}

; CHECK-LABEL: @no_sink_private_to_flat(
; CHECK: addrspacecast
; CHECK: br
; CHECK-NOT: addrspacecast
define i64 @no_sink_private_to_flat(i1 %pred, ptr addrspace(5) %ptr) {
; CHECK-LABEL: define i64 @no_sink_private_to_flat(
; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
; CHECK-NEXT: [[PTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(5) [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[PTR_CAST]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(5) %ptr to ptr
br i1 %pred, label %l1, label %l2

Expand All @@ -35,11 +50,18 @@ l2:
}


; CHECK-LABEL: @sink_global_to_flat(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_global_to_flat(i1 %pred, ptr addrspace(1) %ptr) {
; CHECK-LABEL: define i64 @sink_global_to_flat(
; CHECK-SAME: i1 [[PRED:%.*]], ptr addrspace(1) [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[TMP1]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr addrspace(1) %ptr to ptr
br i1 %pred, label %l1, label %l2

Expand All @@ -52,11 +74,18 @@ l2:
ret i64 %v2
}

; CHECK-LABEL: @sink_flat_to_global(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_global(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_global(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(1) [[TMP1]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(1)
br i1 %pred, label %l1, label %l2

Expand All @@ -69,11 +98,18 @@ l2:
ret i64 %v2
}

; CHECK-LABEL: @sink_flat_to_constant(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_constant(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_constant(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(4)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(4) [[TMP1]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(4)
br i1 %pred, label %l1, label %l2

Expand All @@ -86,11 +122,18 @@ l2:
ret i64 %v2
}

; CHECK-LABEL: @sink_flat_to_local(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_local(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_local(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(3) [[TMP1]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(3)
br i1 %pred, label %l1, label %l2

Expand All @@ -103,11 +146,18 @@ l2:
ret i64 %v2
}

; CHECK-LABEL: @sink_flat_to_private(
; CHECK-NOT: addrspacecast
; CHECK: br
; CHECK: addrspacecast
define i64 @sink_flat_to_private(i1 %pred, ptr %ptr) {
; CHECK-LABEL: define i64 @sink_flat_to_private(
; CHECK-SAME: i1 [[PRED:%.*]], ptr [[PTR:%.*]]) {
; CHECK-NEXT: br i1 [[PRED]], label [[L1:%.*]], label [[L2:%.*]]
; CHECK: l1:
; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[PTR]], align 4
; CHECK-NEXT: ret i64 [[V1]]
; CHECK: l2:
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
; CHECK-NEXT: [[V2:%.*]] = load i64, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: ret i64 [[V2]]
;
%ptr_cast = addrspacecast ptr %ptr to ptr addrspace(5)
br i1 %pred, label %l1, label %l2

Expand Down
41 changes: 23 additions & 18 deletions llvm/test/Transforms/SafeStack/X86/setjmp2.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s

Expand All @@ -9,32 +10,36 @@

; setjmp/longjmp test with dynamically sized array.
; Requires protector.
; CHECK: @foo(i32 %[[ARG:.*]])
define i32 @foo(i32 %size) nounwind uwtable safestack {
; CHECK-LABEL: define i32 @foo(
; CHECK-SAME: i32 [[SIZE:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[UNSAFE_STACK_PTR:%.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr, align 8
; CHECK-NEXT: [[UNSAFE_STACK_DYNAMIC_PTR:%.*]] = alloca ptr, align 8
; CHECK-NEXT: store ptr [[UNSAFE_STACK_PTR]], ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SIZE]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr, align 8
; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[TMP2]] to i64
; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], [[TMP1]]
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -16
; CHECK-NEXT: [[A:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: store ptr [[A]], ptr @__safestack_unsafe_stack_ptr, align 8
; CHECK-NEXT: store ptr [[A]], ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8
; CHECK-NEXT: [[CALL:%.*]] = call i32 @_setjmp(ptr @buf) #[[ATTR1:[0-9]+]]
; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[UNSAFE_STACK_DYNAMIC_PTR]], align 8
; CHECK-NEXT: store ptr [[TMP6]], ptr @__safestack_unsafe_stack_ptr, align 8
; CHECK-NEXT: call void @funcall(ptr [[A]])
; CHECK-NEXT: store ptr [[UNSAFE_STACK_PTR]], ptr @__safestack_unsafe_stack_ptr, align 8
; CHECK-NEXT: ret i32 0
;
entry:
; CHECK: %[[SP:.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr
; CHECK-NEXT: %[[DYNPTR:.*]] = alloca ptr
; CHECK-NEXT: store ptr %[[SP]], ptr %[[DYNPTR]]

; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[ARG]] to i64
; CHECK-NEXT: %[[MUL:.*]] = mul i64 %[[ZEXT]], 4
; CHECK-NEXT: %[[SP2:.*]] = load ptr, ptr @__safestack_unsafe_stack_ptr
; CHECK-NEXT: %[[PTRTOINT:.*]] = ptrtoint ptr %[[SP2]] to i64
; CHECK-NEXT: %[[SUB:.*]] = sub i64 %[[PTRTOINT]], %[[MUL]]
; CHECK-NEXT: %[[AND:.*]] = and i64 %[[SUB]], -16
; CHECK-NEXT: %[[INTTOPTR:.*]] = inttoptr i64 %[[AND]] to ptr
; CHECK-NEXT: store ptr %[[INTTOPTR]], ptr @__safestack_unsafe_stack_ptr
; CHECK-NEXT: store ptr %[[INTTOPTR]], ptr %unsafe_stack_dynamic_ptr
%a = alloca i32, i32 %size

; CHECK: setjmp
; CHECK-NEXT: %[[LOAD:.*]] = load ptr, ptr %[[DYNPTR]]
; CHECK-NEXT: store ptr %[[LOAD]], ptr @__safestack_unsafe_stack_ptr
%call = call i32 @_setjmp(ptr @buf) returns_twice

; CHECK: call void @funcall(ptr %[[INTTOPTR]])
call void @funcall(ptr %a)
; CHECK-NEXT: store ptr %[[SP:.*]], ptr @__safestack_unsafe_stack_ptr
ret i32 0
}

Expand Down
234 changes: 167 additions & 67 deletions llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

Large diffs are not rendered by default.