-
Notifications
You must be signed in to change notification settings - Fork 15k
[AMDGPU][NFC] Mark GEPs in flat offset folding tests as inbounds #165426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/ritter-x2a/10-27-_sdag_set_inbounds_when_when_computing_offsets_into_memory_objects
Are you sure you want to change the base?
Conversation
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Fabian Ritter (ritter-x2a) ChangesThis is in preparation for a patch that will only fold offsets into flat Basically a retry of the very similar PR #131994, as part of an updated stack For SWDEV-516125. Patch is 476.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165426.diff 21 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index d89b39348ad9a..0310b7e788ddf 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec
; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec
@@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: successors: %bb.71(0x80000000)
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec
; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec
; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec
@@ -1007,12 +1007,12 @@ bb:
%i11 = icmp eq i32 %i, 0
%i12 = load i32, ptr addrspace(3) null, align 8
%i13 = zext i32 %i12 to i64
- %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13
+ %i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13
br i1 %arg3, label %bb15, label %bb103
bb15:
%i16 = zext i32 %i to i64
- %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16
+ %i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16
%i18 = ptrtoint ptr addrspace(1) %i17 to i64
br i1 %arg4, label %bb19, label %bb20
@@ -1021,7 +1021,7 @@ bb19:
unreachable
bb20:
- %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256
+ %i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256
%i22 = ptrtoint ptr addrspace(1) %i21 to i64
%i23 = inttoptr i64 %i22 to ptr
%i24 = load i8, ptr %i23, align 1
@@ -1033,7 +1033,7 @@ bb26:
unreachable
bb27:
- %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512
+ %i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512
%i29 = ptrtoint ptr addrspace(1) %i28 to i64
%i30 = inttoptr i64 %i29 to ptr
%i31 = load i8, ptr %i30, align 1
@@ -1045,7 +1045,7 @@ bb33:
unreachable
bb34:
- %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768
+ %i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768
%i36 = ptrtoint ptr addrspace(1) %i35 to i64
%i37 = inttoptr i64 %i36 to ptr
%i38 = load i8, ptr %i37, align 1
@@ -1057,7 +1057,7 @@ bb40:
unreachable
bb41:
- %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024
+ %i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024
%i43 = ptrtoint ptr addrspace(1) %i42 to i64
%i44 = inttoptr i64 %i43 to ptr
%i45 = load i8, ptr %i44, align 1
@@ -1069,7 +1069,7 @@ bb47:
unreachable
bb48:
- %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280
+ %i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280
%i50 = ptrtoint ptr addrspace(1) %i49 to i64
%i51 = inttoptr i64 %i50 to ptr
%i52 = load i8, ptr %i51, align 1
@@ -1081,7 +1081,7 @@ bb54:
unreachable
bb55:
- %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536
+ %i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536
%i57 = ptrtoint ptr addrspace(1) %i56 to i64
%i58 = or i64 %i57, 1
%i59 = inttoptr i64 %i58 to ptr
@@ -1113,7 +1113,7 @@ bb67:
bb68:
%i69 = zext i1 %arg5 to i8
- %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16
+ %i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16
%i71 = ptrtoint ptr addrspace(1) %i70 to i64
br i1 %arg5, label %bb72, label %bb73
@@ -1122,7 +1122,7 @@ bb72:
unreachable
bb73:
- %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256
+ %i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256
%i75 = ptrtoint ptr addrspace(1) %i74 to i64
%i76 = inttoptr i64 %i75 to ptr
%i77 = load i8, ptr %i76, align 1
@@ -1134,7 +1134,7 @@ bb79:
unreachable
bb80:
- %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512
+ %i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512
%i82 = ptrtoint ptr addrspace(1) %i81 to i64
%i83 = or i64 %i82, 1
br i1 %arg6, label %bb84, label %bb85
@@ -1269,7 +1269,7 @@ bb174:
%i182 = select i1 %arg3, i32 %i181, i32 0
%i183 = or i32 %i182, %i154
%i184 = or i32 %i183, %i156
- %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13
+ %i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13
br i1 %arg3, label %bb186, label %bb196
bb186:
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index 890f4f77ed107..e509d7b2b9b1b 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -12,8 +12,8 @@
define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
@@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
@@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
@@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
;
; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
-; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
+; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
@@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -167,12 +167,12 @@ done:
define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX7: if:
; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX7-NEXT: br label [[ENDIF]]
; OPT-GFX7: endif:
@@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX8: if:
@@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX9-NEXT: br label [[ENDIF]]
; OPT-GFX9: endif:
@@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
;
; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX10: if:
; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
-; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
+; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28
; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
; OPT-GFX10-NEXT: br label [[ENDIF]]
; OPT-GFX10: endif:
@@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in,
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(1)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -325,12 +325,12 @@ done:
define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
; OPT-NEXT: entry:
-; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
+; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999
; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT: if:
; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
-; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
+; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28
; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
; OPT-NEXT: br label [[ENDIF]]
; OPT: endif:
@@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i64 999999
- %in.gep = getelementptr i32, ptr %in, i64 7
+ %out.gep = getelementptr inbounds i32, ptr %out, i64 999999
+ %in.gep = getelementptr inbounds i32, ptr %in, i64 7
%cast = addrspacecast ptr %in.gep to ptr addrspace(4)
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %endif, label %if
@@ -438,8 +438,8 @@ done:
define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX7-NEXT: entry:
-; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX8-NEXT: entry:
-; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX9-NEXT: entry:
-; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
; OPT-GFX9: if:
-; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
; OPT-GFX9-NEXT: br label [[ENDIF]]
@@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
;
; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
; OPT-GFX10-NEXT: entry:
-; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
-; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
+; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024
+; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095
; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
@@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_setpc_b64 s[30:31]
entry:
- %out.gep = getelementptr i32, ptr %out, i32 1024
- %in.gep = getelementptr i8, ptr %in, i64 4095
+ %out.gep = getelementptr inbounds i32, ptr %out, i32 1024
+ %in.gep = getelementptr inbounds i8, ptr %in, i64 4095
%tid = cal...
[truncated]
|
This is in preparation for a patch that will only fold offsets into flat instructions if their addition is inbounds. Marking the GEPs inbounds here means that their output won't change with the later patch. Basically a retry of the very similar PR #131994, as part of an updated stack of PRs. For SWDEV-516125.
60a10b2 to
ed34e66
Compare
Squash of the following upstream PRs for downstream testing: - llvm#165424 - llvm#165425 - llvm#165426 - llvm#165427 Regenerated outputs for the following tests to resolve merge conflicts: - llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll - llvm/test/CodeGen/AMDGPU/preserve-wwm-copy-dst-reg.ll For SWDEV-516125.

This is in preparation for a patch that will only fold offsets into flat
instructions if their addition is inbounds. Marking the GEPs inbounds here
means that their output won't change with the later patch.
Basically a retry of the very similar PR #131994, as part of an updated stack
of PRs.
For SWDEV-516125.