diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index d89b39348ad9a..0310b7e788ddf 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -62,7 +62,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr56_sgpr57:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000FF, $vgpr4_vgpr5:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr40_sgpr41 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec + ; GFX90A-NEXT: renamable $vgpr0_vgpr1 = nsw V_LSHLREV_B64_e64 2, $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr25, implicit $exec ; GFX90A-NEXT: renamable $vgpr46, renamable $vcc = V_ADD_CO_U32_e64 $sgpr24, $vgpr0, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr47, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr1, killed $vcc, 0, implicit $exec @@ -959,7 +959,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.71(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr12, $vgpr17, $vgpr30, $vgpr31, $agpr0_agpr1:0x0000000000000003, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr18_sgpr19, $sgpr34_sgpr35, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000C, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr14_vgpr15:0x000000000000000C, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000C, $vgpr24_vgpr25:0x000000000000000C, $vgpr26_vgpr27:0x000000000000000C, $vgpr28_vgpr29:0x000000000000000C, $vgpr32_vgpr33:0x0000000000000003, $vgpr34_vgpr35:0x0000000000000003, $vgpr36_vgpr37:0x0000000000000003, $vgpr38_vgpr39:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr48_vgpr49:0x0000000000000003, $vgpr50_vgpr51:0x0000000000000003, $vgpr52_vgpr53:0x0000000000000003, $vgpr54_vgpr55:0x0000000000000003, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec + ; GFX90A-NEXT: renamable $vgpr4_vgpr5 = nsw V_LSHLREV_B64_e64 3, killed $vgpr4_vgpr5, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = COPY renamable $sgpr27, implicit $exec ; GFX90A-NEXT: renamable $vgpr4, renamable $vcc = V_ADD_CO_U32_e64 killed $sgpr26, $vgpr4, 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr2, dead renamable $vcc = V_ADDC_U32_e64 killed $vgpr2, killed $vgpr5, killed $vcc, 0, implicit $exec @@ -1007,12 +1007,12 @@ bb: %i11 = icmp eq i32 %i, 0 %i12 = load i32, ptr addrspace(3) null, align 8 %i13 = zext i32 %i12 to i64 - %i14 = getelementptr i32, ptr addrspace(1) %arg, i64 %i13 + %i14 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %i13 br i1 %arg3, label %bb15, label %bb103 bb15: %i16 = zext i32 %i to i64 - %i17 = getelementptr i32, ptr addrspace(1) %i14, i64 %i16 + %i17 = getelementptr inbounds i32, ptr addrspace(1) %i14, i64 %i16 %i18 = ptrtoint ptr addrspace(1) %i17 to i64 br i1 %arg4, label %bb19, label %bb20 @@ -1021,7 +1021,7 @@ bb19: unreachable bb20: - %i21 = getelementptr i32, ptr addrspace(1) %i17, i64 256 + %i21 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 256 %i22 = ptrtoint ptr addrspace(1) %i21 to i64 %i23 = inttoptr i64 %i22 to ptr %i24 = load i8, ptr %i23, align 1 @@ -1033,7 +1033,7 @@ bb26: unreachable bb27: - %i28 = getelementptr i32, ptr addrspace(1) %i17, i64 512 + %i28 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 512 %i29 = ptrtoint ptr addrspace(1) %i28 to i64 %i30 = inttoptr i64 %i29 to ptr %i31 = load i8, ptr %i30, align 1 @@ -1045,7 +1045,7 @@ bb33: unreachable bb34: - %i35 = getelementptr i32, ptr addrspace(1) %i17, i64 768 + %i35 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 768 %i36 = ptrtoint ptr addrspace(1) %i35 to i64 %i37 = inttoptr i64 %i36 to ptr %i38 = load i8, ptr %i37, align 1 @@ -1057,7 +1057,7 @@ bb40: unreachable bb41: - %i42 = getelementptr i32, ptr addrspace(1) %i17, i64 1024 + %i42 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1024 %i43 = ptrtoint ptr addrspace(1) %i42 to i64 %i44 = inttoptr i64 %i43 to ptr %i45 = load i8, ptr %i44, align 1 @@ -1069,7 +1069,7 @@ bb47: unreachable bb48: - %i49 = getelementptr i32, ptr addrspace(1) %i17, i64 1280 + %i49 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1280 %i50 = ptrtoint ptr addrspace(1) %i49 to i64 %i51 = inttoptr i64 %i50 to ptr %i52 = load i8, ptr %i51, align 1 @@ -1081,7 +1081,7 @@ bb54: unreachable bb55: - %i56 = getelementptr i32, ptr addrspace(1) %i17, i64 1536 + %i56 = getelementptr inbounds i32, ptr addrspace(1) %i17, i64 1536 %i57 = ptrtoint ptr addrspace(1) %i56 to i64 %i58 = or i64 %i57, 1 %i59 = inttoptr i64 %i58 to ptr @@ -1113,7 +1113,7 @@ bb67: bb68: %i69 = zext i1 %arg5 to i8 - %i70 = getelementptr [2 x i32], ptr addrspace(1) null, i64 %i16 + %i70 = getelementptr inbounds [2 x i32], ptr addrspace(1) null, i64 %i16 %i71 = ptrtoint ptr addrspace(1) %i70 to i64 br i1 %arg5, label %bb72, label %bb73 @@ -1122,7 +1122,7 @@ bb72: unreachable bb73: - %i74 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 256 + %i74 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 256 %i75 = ptrtoint ptr addrspace(1) %i74 to i64 %i76 = inttoptr i64 %i75 to ptr %i77 = load i8, ptr %i76, align 1 @@ -1134,7 +1134,7 @@ bb79: unreachable bb80: - %i81 = getelementptr [2 x i32], ptr addrspace(1) %i70, i64 512 + %i81 = getelementptr inbounds [2 x i32], ptr addrspace(1) %i70, i64 512 %i82 = ptrtoint ptr addrspace(1) %i81 to i64 %i83 = or i64 %i82, 1 br i1 %arg6, label %bb84, label %bb85 @@ -1269,7 +1269,7 @@ bb174: %i182 = select i1 %arg3, i32 %i181, i32 0 %i183 = or i32 %i182, %i154 %i184 = or i32 %i183, %i156 - %i185 = getelementptr [2 x i32], ptr addrspace(1) %arg1, i64 %i13 + %i185 = getelementptr inbounds [2 x i32], ptr addrspace(1) %arg1, i64 %i13 br i1 %arg3, label %bb186, label %bb196 bb186: diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll index 890f4f77ed107..e509d7b2b9b1b 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll @@ -12,8 +12,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX7: if: @@ -28,8 +28,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX8: if: @@ -44,11 +44,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 ; OPT-GFX9-NEXT: br label [[ENDIF]] ; OPT-GFX9: endif: @@ -58,11 +58,11 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX10: if: -; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 +; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 28 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 ; OPT-GFX10-NEXT: br label [[ENDIF]] ; OPT-GFX10: endif: @@ -146,8 +146,8 @@ define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -167,12 +167,12 @@ done: define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX7: if: ; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX7-NEXT: br label [[ENDIF]] ; OPT-GFX7: endif: @@ -182,8 +182,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 7 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX8: if: @@ -197,12 +197,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: ; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX9-NEXT: br label [[ENDIF]] ; OPT-GFX9: endif: @@ -212,12 +212,12 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX10: if: ; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) -; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 +; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 28 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 ; OPT-GFX10-NEXT: br label [[ENDIF]] ; OPT-GFX10: endif: @@ -303,8 +303,8 @@ define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cast = addrspacecast ptr %in.gep to ptr addrspace(1) %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -325,12 +325,12 @@ done: define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) { ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( ; OPT-NEXT: entry: -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 999999 ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT: if: ; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4) -; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28 +; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 28 ; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4 ; OPT-NEXT: br label [[ENDIF]] ; OPT: endif: @@ -416,8 +416,8 @@ define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 999999 - %in.gep = getelementptr i32, ptr %in, i64 7 + %out.gep = getelementptr inbounds i32, ptr %out, i64 999999 + %in.gep = getelementptr inbounds i32, ptr %in, i64 7 %cast = addrspacecast ptr %in.gep to ptr addrspace(4) %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %endif, label %if @@ -438,8 +438,8 @@ done: define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX7-NEXT: entry: -; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -456,8 +456,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX8-NEXT: entry: -; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -474,12 +474,12 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX9-NEXT: entry: -; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 ; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] ; OPT-GFX9: if: -; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1 ; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 ; OPT-GFX9-NEXT: br label [[ENDIF]] @@ -490,8 +490,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset( ; OPT-GFX10-NEXT: entry: -; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 +; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4095 ; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -588,8 +588,8 @@ define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i32 1024 - %in.gep = getelementptr i8, ptr %in, i64 4095 + %out.gep = getelementptr inbounds i32, ptr %out, i32 1024 + %in.gep = getelementptr inbounds i8, ptr %in, i64 4095 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %endif, label %if @@ -611,8 +611,8 @@ done: define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 { ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset( ; OPT-NEXT: entry: -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999 -; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096 +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i64 99999 +; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 4096 ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -711,8 +711,8 @@ define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i64 99999 - %in.gep = getelementptr i8, ptr %in, i64 4096 + %out.gep = getelementptr inbounds i32, ptr %out, i64 99999 + %in.gep = getelementptr inbounds i8, ptr %in, i64 4096 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %endif, label %if @@ -734,8 +734,8 @@ done: define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 { ; OPT-LABEL: @test_sinkable_flat_reg_offset( ; OPT-NEXT: entry: -; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 -; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]] +; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr inbounds i32, ptr [[OUT:%.*]], i32 1024 +; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr inbounds i8, ptr [[IN:%.*]], i64 [[REG:%.*]] ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]] ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] @@ -834,8 +834,8 @@ define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 { ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: - %out.gep = getelementptr i32, ptr %out, i32 1024 - %in.gep = getelementptr i8, ptr %in, i64 %reg + %out.gep = getelementptr inbounds i32, ptr %out, i32 1024 + %in.gep = getelementptr inbounds i8, ptr %in, i64 %reg %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %endif, label %if diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll index 1a4a54b81c78f..119056a72f2c6 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fadd.ll @@ -373,7 +373,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret float %result } @@ -570,7 +570,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret float %result } @@ -995,7 +995,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -1219,7 +1219,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -1409,7 +1409,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret float %result } @@ -1630,7 +1630,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -1795,7 +1795,7 @@ define void @flat_agent_atomic_fadd_noret_f32_maybe_remote(ptr %ptr, float %val) ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst ret void } @@ -1947,7 +1947,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(pt ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2165,7 +2165,7 @@ define void @flat_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__a ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -2330,7 +2330,7 @@ define void @flat_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr %p ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.ignore.denormal.mode !0 ret void } @@ -2698,7 +2698,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2895,7 +2895,7 @@ define float @flat_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -3320,7 +3320,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3544,7 +3544,7 @@ define void @flat_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fadd ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3734,7 +3734,7 @@ define float @flat_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -3955,7 +3955,7 @@ define void @flat_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4145,7 +4145,7 @@ define float @flat_agent_atomic_fadd_ret_f32__ieee__amdgpu_no_fine_grained_memor ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret float %result } @@ -4366,7 +4366,7 @@ define void @flat_agent_atomic_fadd_noret_f32__ieee__amdgpu_no_fine_grained_memo ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0 ret void } @@ -6590,7 +6590,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %result = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -7052,7 +7052,7 @@ define double @flat_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %result = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -7931,7 +7931,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %unused = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8381,7 +8381,7 @@ define void @flat_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %unused = atomicrmw fadd ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9141,7 +9141,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -9526,7 +9526,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -10256,7 +10256,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10628,7 +10628,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10902,7 +10902,7 @@ define void @flat_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -11186,7 +11186,7 @@ define half @flat_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -11574,7 +11574,7 @@ define half @flat_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -11949,7 +11949,7 @@ define void @flat_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -12872,7 +12872,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -13340,7 +13340,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -13792,7 +13792,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14245,7 +14245,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14618,7 +14618,7 @@ define bfloat @flat_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -14979,7 +14979,7 @@ define void @flat_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15887,7 +15887,7 @@ define bfloat @flat_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fadd ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -16343,7 +16343,7 @@ define void @flat_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fadd ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -16726,7 +16726,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -16930,7 +16930,7 @@ define <2 x half> @flat_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -17297,7 +17297,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -17500,7 +17500,7 @@ define void @flat_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -17694,7 +17694,7 @@ define <2 x half> @flat_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -17884,7 +17884,7 @@ define void @flat_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -19273,7 +19273,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -19613,7 +19613,7 @@ define <2 x bfloat> @flat_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %result = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -20246,7 +20246,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -20585,7 +20585,7 @@ define void @flat_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -20914,7 +20914,7 @@ define <2 x bfloat> @flat_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fadd ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -21237,7 +21237,7 @@ define void @flat_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fadd ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll index 59b0537b817d2..0c592a2097896 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmax.ll @@ -301,7 +301,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -468,7 +468,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -757,7 +757,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -924,7 +924,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1076,7 +1076,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -1227,7 +1227,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1859,7 +1859,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2026,7 +2026,7 @@ define float @flat_agent_atomic_fmax_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2315,7 +2315,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2482,7 +2482,7 @@ define void @flat_agent_atomic_fmax_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fmax ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2634,7 +2634,7 @@ define float @flat_system_atomic_fmax_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2785,7 +2785,7 @@ define void @flat_system_atomic_fmax_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3591,7 +3591,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %result = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -4004,7 +4004,7 @@ define double @flat_agent_atomic_fmax_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %result = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -4795,7 +4795,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %unused = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -5201,7 +5201,7 @@ define void @flat_agent_atomic_fmax_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %unused = atomicrmw fmax ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6824,7 +6824,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -7234,7 +7234,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -8009,7 +8009,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8406,7 +8406,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8710,7 +8710,7 @@ define half @flat_agent_atomic_fmax_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -9004,7 +9004,7 @@ define void @flat_agent_atomic_fmax_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9417,7 +9417,7 @@ define half @flat_system_atomic_fmax_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -9817,7 +9817,7 @@ define void @flat_system_atomic_fmax_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10742,7 +10742,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -11211,7 +11211,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -12102,7 +12102,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -12556,7 +12556,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -12930,7 +12930,7 @@ define bfloat @flat_agent_atomic_fmax_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -13292,7 +13292,7 @@ define void @flat_agent_atomic_fmax_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -13764,7 +13764,7 @@ define bfloat @flat_system_atomic_fmax_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmax ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -14221,7 +14221,7 @@ define void @flat_system_atomic_fmax_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmax ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14700,7 +14700,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -14956,7 +14956,7 @@ define <2 x half> @flat_agent_atomic_fmax_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %result = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -15417,7 +15417,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15670,7 +15670,7 @@ define void @flat_agent_atomic_fmax_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %unused = atomicrmw fmax ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15912,7 +15912,7 @@ define <2 x half> @flat_system_atomic_fmax_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -16149,7 +16149,7 @@ define void @flat_system_atomic_fmax_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -17050,7 +17050,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -17518,7 +17518,7 @@ define <2 x bfloat> @flat_agent_atomic_fmax_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %result = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -18389,7 +18389,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -18850,7 +18850,7 @@ define void @flat_agent_atomic_fmax_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -19304,7 +19304,7 @@ define <2 x bfloat> @flat_system_atomic_fmax_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fmax ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -19747,7 +19747,7 @@ define void @flat_system_atomic_fmax_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fmax ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll index c9c9f332fe391..d08fdc9809e19 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fmin.ll @@ -301,7 +301,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -468,7 +468,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__amdgpu_no_fine_grai ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -757,7 +757,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -924,7 +924,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1076,7 +1076,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -1227,7 +1227,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1859,7 +1859,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fine ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2026,7 +2026,7 @@ define float @flat_agent_atomic_fmin_ret_f32__offset12b_neg__ftz__amdgpu_no_fine ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2315,7 +2315,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2482,7 +2482,7 @@ define void @flat_agent_atomic_fmin_noret_f32__offset12b_neg__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fmin ptr %gep, float %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2634,7 +2634,7 @@ define float @flat_system_atomic_fmin_ret_f32__offset12b_pos__ftz__amdgpu_no_fin ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret float %result } @@ -2785,7 +2785,7 @@ define void @flat_system_atomic_fmin_noret_f32__offset12b_pos__ftz__amdgpu_no_fi ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX7-NEXT: buffer_wbinvl1 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, float %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3591,7 +3591,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %result = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -4004,7 +4004,7 @@ define double @flat_agent_atomic_fmin_ret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %result = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret double %result } @@ -4795,7 +4795,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %unused = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -5201,7 +5201,7 @@ define void @flat_agent_atomic_fmin_noret_f64__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %unused = atomicrmw fmin ptr %gep, double %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6824,7 +6824,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -7234,7 +7234,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_neg__amdgpu_no_fine_grain ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -8009,7 +8009,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8406,7 +8406,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b_neg__amdgpu_no_fine_gra ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8710,7 +8710,7 @@ define half @flat_agent_atomic_fmin_ret_f16__offset12b_pos__align4__amdgpu_no_fi ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -9004,7 +9004,7 @@ define void @flat_agent_atomic_fmin_noret_f16__offset12b__align4_pos__amdgpu_no_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, half %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9417,7 +9417,7 @@ define half @flat_system_atomic_fmin_ret_f16__offset12b_pos__amdgpu_no_fine_grai ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret half %result } @@ -9817,7 +9817,7 @@ define void @flat_system_atomic_fmin_noret_f16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, half %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10742,7 +10742,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -11211,7 +11211,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -12102,7 +12102,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -12556,7 +12556,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b_neg__amdgpu_no_fine_gr ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -12930,7 +12930,7 @@ define bfloat @flat_agent_atomic_fmin_ret_bf16__offset12b_pos__align4__amdgpu_no ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -13292,7 +13292,7 @@ define void @flat_agent_atomic_fmin_noret_bf16__offset12b__align4_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4, !amdgpu.no.fine.grained.memory !0 ret void } @@ -13764,7 +13764,7 @@ define bfloat @flat_system_atomic_fmin_ret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fmin ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret bfloat %result } @@ -14221,7 +14221,7 @@ define void @flat_system_atomic_fmin_noret_bf16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fmin ptr %gep, bfloat %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14700,7 +14700,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -14956,7 +14956,7 @@ define <2 x half> @flat_agent_atomic_fmin_ret_v2f16__offset12b_neg__amdgpu_no_fi ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %result = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -15417,7 +15417,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15670,7 +15670,7 @@ define void @flat_agent_atomic_fmin_noret_v2f16__offset12b_neg__amdgpu_no_fine_g ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %unused = atomicrmw fmin ptr %gep, <2 x half> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15912,7 +15912,7 @@ define <2 x half> @flat_system_atomic_fmin_ret_v2f16__offset12b_pos__amdgpu_no_f ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x half> %result } @@ -16149,7 +16149,7 @@ define void @flat_system_atomic_fmin_noret_v2f16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, <2 x half> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -17050,7 +17050,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -17518,7 +17518,7 @@ define <2 x bfloat> @flat_agent_atomic_fmin_ret_v2bf16__offset12b_neg__amdgpu_no ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %result = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -18389,7 +18389,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -18850,7 +18850,7 @@ define void @flat_agent_atomic_fmin_noret_v2bf16__offset12b_neg__amdgpu_no_fine_ ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -19304,7 +19304,7 @@ define <2 x bfloat> @flat_system_atomic_fmin_ret_v2bf16__offset12b_pos__amdgpu_n ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fmin ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret <2 x bfloat> %result } @@ -19747,7 +19747,7 @@ define void @flat_system_atomic_fmin_noret_v2bf16__offset12b_pos__amdgpu_no_fine ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fmin ptr %gep, <2 x bfloat> %val seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll index 587c2ea885077..e5c967666c9d7 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-atomicrmw-fsub.ll @@ -405,7 +405,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %val ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret float %result } @@ -622,7 +622,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg(ptr %ptr, float %val ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret float %result } @@ -1001,7 +1001,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %va ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret void } @@ -1213,7 +1213,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg(ptr %ptr, float %va ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret void } @@ -1416,7 +1416,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos(ptr %ptr, float %va ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, float %val seq_cst ret float %result } @@ -1612,7 +1612,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos(ptr %ptr, float %v ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, float %val seq_cst ret void } @@ -2012,7 +2012,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, float ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret float %result } @@ -2229,7 +2229,7 @@ define float @flat_agent_atomic_fsub_ret_f32__offset12b_neg__ftz(ptr %ptr, float ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %result = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret float %result } @@ -2608,7 +2608,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, floa ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret void } @@ -2820,7 +2820,7 @@ define void @flat_agent_atomic_fsub_noret_f32__offset12b_neg__ftz(ptr %ptr, floa ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 -512 + %gep = getelementptr inbounds float, ptr %ptr, i64 -512 %unused = atomicrmw fsub ptr %gep, float %val syncscope("agent") seq_cst ret void } @@ -3023,7 +3023,7 @@ define float @flat_system_atomic_fsub_ret_f32__offset12b_pos__ftz(ptr %ptr, floa ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, float %val seq_cst ret float %result } @@ -3219,7 +3219,7 @@ define void @flat_system_atomic_fsub_noret_f32__offset12b_pos__ftz(ptr %ptr, flo ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %ptr, i64 511 + %gep = getelementptr inbounds float, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, float %val seq_cst ret void } @@ -4085,7 +4085,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_pos(ptr %ptr, double %v ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %result = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst ret double %result } @@ -4532,7 +4532,7 @@ define double @flat_agent_atomic_fsub_ret_f64__offset12b_neg(ptr %ptr, double %v ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %result = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst ret double %result } @@ -5385,7 +5385,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_pos(ptr %ptr, double %v ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 255 + %gep = getelementptr inbounds double, ptr %ptr, i64 255 %unused = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst ret void } @@ -5822,7 +5822,7 @@ define void @flat_agent_atomic_fsub_noret_f64__offset12b_neg(ptr %ptr, double %v ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %ptr, i64 -256 + %gep = getelementptr inbounds double, ptr %ptr, i64 -256 %unused = atomicrmw fsub ptr %gep, double %val syncscope("agent") seq_cst ret void } @@ -6582,7 +6582,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val) ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst ret half %result } @@ -6967,7 +6967,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_neg(ptr %ptr, half %val) ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst ret half %result } @@ -7697,7 +7697,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %val ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst ret void } @@ -8069,7 +8069,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b_neg(ptr %ptr, half %val ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds half, ptr %ptr, i64 -1024 %unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst ret void } @@ -8353,7 +8353,7 @@ define half @flat_agent_atomic_fsub_ret_f16__offset12b_pos__align4(ptr %ptr, hal ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst, align 4 ret half %result } @@ -8627,7 +8627,7 @@ define void @flat_agent_atomic_fsub_noret_f16__offset12b__align4_pos(ptr %ptr, h ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, half %val syncscope("agent") seq_cst, align 4 ret void } @@ -9015,7 +9015,7 @@ define half @flat_system_atomic_fsub_ret_f16__offset12b_pos(ptr %ptr, half %val) ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v2, v5 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, half %val seq_cst ret half %result } @@ -9390,7 +9390,7 @@ define void @flat_system_atomic_fsub_noret_f16__offset12b_pos(ptr %ptr, half %va ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr half, ptr %ptr, i64 1023 + %gep = getelementptr inbounds half, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, half %val seq_cst ret void } @@ -10313,7 +10313,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat % ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst ret bfloat %result } @@ -10781,7 +10781,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_neg(ptr %ptr, bfloat % ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst ret bfloat %result } @@ -11670,7 +11670,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat % ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst ret void } @@ -12123,7 +12123,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b_neg(ptr %ptr, bfloat % ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 -1024 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 -1024 %unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst ret void } @@ -12496,7 +12496,7 @@ define bfloat @flat_agent_atomic_fsub_ret_bf16__offset12b_pos__align4(ptr %ptr, ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4 ret bfloat %result } @@ -12857,7 +12857,7 @@ define void @flat_agent_atomic_fsub_noret_bf16__offset12b__align4_pos(ptr %ptr, ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, bfloat %val syncscope("agent") seq_cst, align 4 ret void } @@ -13328,7 +13328,7 @@ define bfloat @flat_system_atomic_fsub_ret_bf16__offset12b_pos(ptr %ptr, bfloat ; GFX7-NEXT: v_lshrrev_b32_e32 v0, v3, v5 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %result = atomicrmw fsub ptr %gep, bfloat %val seq_cst ret bfloat %result } @@ -13784,7 +13784,7 @@ define void @flat_system_atomic_fsub_noret_bf16__offset12b_pos(ptr %ptr, bfloat ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr bfloat, ptr %ptr, i64 1023 + %gep = getelementptr inbounds bfloat, ptr %ptr, i64 1023 %unused = atomicrmw fsub ptr %gep, bfloat %val seq_cst ret void } @@ -14229,7 +14229,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret <2 x half> %result } @@ -14468,7 +14468,7 @@ define <2 x half> @flat_agent_atomic_fsub_ret_v2f16__offset12b_neg(ptr %ptr, <2 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %result = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret <2 x half> %result } @@ -14891,7 +14891,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x ha ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret void } @@ -15125,7 +15125,7 @@ define void @flat_agent_atomic_fsub_noret_v2f16__offset12b_neg(ptr %ptr, <2 x ha ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 -512 %unused = atomicrmw fsub ptr %gep, <2 x half> %val syncscope("agent") seq_cst ret void } @@ -15350,7 +15350,7 @@ define <2 x half> @flat_system_atomic_fsub_ret_v2f16__offset12b_pos(ptr %ptr, <2 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, <2 x half> %val seq_cst ret <2 x half> %result } @@ -15568,7 +15568,7 @@ define void @flat_system_atomic_fsub_noret_v2f16__offset12b_pos(ptr %ptr, <2 x h ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x half>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x half>, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, <2 x half> %val seq_cst ret void } @@ -16469,7 +16469,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr, ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst ret <2 x bfloat> %result } @@ -16937,7 +16937,7 @@ define <2 x bfloat> @flat_agent_atomic_fsub_ret_v2bf16__offset12b_neg(ptr %ptr, ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %result = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst ret <2 x bfloat> %result } @@ -17808,7 +17808,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x b ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst ret void } @@ -18269,7 +18269,7 @@ define void @flat_agent_atomic_fsub_noret_v2bf16__offset12b_neg(ptr %ptr, <2 x b ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 -512 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 -512 %unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val syncscope("agent") seq_cst ret void } @@ -18723,7 +18723,7 @@ define <2 x bfloat> @flat_system_atomic_fsub_ret_v2bf16__offset12b_pos(ptr %ptr, ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %result = atomicrmw fsub ptr %gep, <2 x bfloat> %val seq_cst ret <2 x bfloat> %result } @@ -19166,7 +19166,7 @@ define void @flat_system_atomic_fsub_noret_v2bf16__offset12b_pos(ptr %ptr, <2 x ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr <2 x bfloat>, ptr %ptr, i64 511 + %gep = getelementptr inbounds <2 x bfloat>, ptr %ptr, i64 511 %unused = atomicrmw fsub ptr %gep, <2 x bfloat> %val seq_cst ret void } diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll index 57be2907da4a0..b35f07002a48a 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics.ll @@ -64,7 +64,7 @@ define amdgpu_kernel void @atomic_add_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw add ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -128,7 +128,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1023 + %gep = getelementptr inbounds i32, ptr %out, i32 1023 %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -196,7 +196,7 @@ define amdgpu_kernel void @atomic_add_i32_max_offset_p1(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1024 + %gep = getelementptr inbounds i32, ptr %out, i32 1024 %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -270,7 +270,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -352,8 +352,8 @@ define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -444,8 +444,8 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile add ptr %gep, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -652,7 +652,7 @@ define amdgpu_kernel void @atomic_add_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst ret void } @@ -739,7 +739,7 @@ define amdgpu_kernel void @atomic_add_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile add ptr %ptr, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -804,7 +804,7 @@ define amdgpu_kernel void @atomic_and_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -878,7 +878,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -960,8 +960,8 @@ define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1052,8 +1052,8 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile and ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -1260,7 +1260,7 @@ define amdgpu_kernel void @atomic_and_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1347,7 +1347,7 @@ define amdgpu_kernel void @atomic_and_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile and ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -1412,7 +1412,7 @@ define amdgpu_kernel void @atomic_sub_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1486,7 +1486,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -1568,8 +1568,8 @@ define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1660,8 +1660,8 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile sub ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -1868,7 +1868,7 @@ define amdgpu_kernel void @atomic_sub_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1955,7 +1955,7 @@ define amdgpu_kernel void @atomic_sub_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile sub ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -2016,7 +2016,7 @@ define amdgpu_kernel void @atomic_max_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2089,7 +2089,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -2167,8 +2167,8 @@ define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2258,8 +2258,8 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile max ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -2457,7 +2457,7 @@ define amdgpu_kernel void @atomic_max_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile max ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2543,7 +2543,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile max ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -2604,7 +2604,7 @@ define amdgpu_kernel void @atomic_umax_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2677,7 +2677,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_offset(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -2755,8 +2755,8 @@ define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr %out, i32 %in, i64 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2846,8 +2846,8 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(ptr %out, ptr %out2 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile umax ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3045,7 +3045,7 @@ define amdgpu_kernel void @atomic_umax_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile umax ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3131,7 +3131,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile umax ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3192,7 +3192,7 @@ define amdgpu_kernel void @atomic_min_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3265,7 +3265,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3343,8 +3343,8 @@ define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3434,8 +3434,8 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile min ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3633,7 +3633,7 @@ define amdgpu_kernel void @atomic_min_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile min ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3719,7 +3719,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile min ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3780,7 +3780,7 @@ define amdgpu_kernel void @atomic_umin_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3853,7 +3853,7 @@ define amdgpu_kernel void @atomic_umin_i32_ret_offset(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -3931,8 +3931,8 @@ define amdgpu_kernel void @atomic_umin_i32_addr64_offset(ptr %out, i32 %in, i64 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4022,8 +4022,8 @@ define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(ptr %out, ptr %out2 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile umin ptr %gep, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -4221,7 +4221,7 @@ define amdgpu_kernel void @atomic_umin_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile umin ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4307,7 +4307,7 @@ define amdgpu_kernel void @atomic_umin_i32_ret_addr64(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile umin ptr %ptr, i32 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -4372,7 +4372,7 @@ define amdgpu_kernel void @atomic_or_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4446,7 +4446,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_offset(ptr %out, ptr %out2, i32 %in ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -4528,8 +4528,8 @@ define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr %out, i32 %in, i64 %i ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4620,8 +4620,8 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile or ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -4828,7 +4828,7 @@ define amdgpu_kernel void @atomic_or_i32_addr64(ptr %out, i32 %in, i64 %index) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4915,7 +4915,7 @@ define amdgpu_kernel void @atomic_or_i32_ret_addr64(ptr %out, ptr %out2, i32 %in ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile or ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -4980,7 +4980,7 @@ define amdgpu_kernel void @atomic_xchg_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -5044,7 +5044,7 @@ define amdgpu_kernel void @atomic_xchg_f32_offset(ptr %out, float %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 %val = atomicrmw volatile xchg ptr %gep, float %in syncscope("agent") seq_cst ret void } @@ -5118,7 +5118,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_offset(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -5200,8 +5200,8 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr %out, i32 %in, i64 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst ret void } @@ -5292,8 +5292,8 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(ptr %out, ptr %out2 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile xchg ptr %gep, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -5500,7 +5500,7 @@ define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst ret void } @@ -5587,7 +5587,7 @@ define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(ptr %out, ptr %out2, i32 % ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile xchg ptr %ptr, i32 %in syncscope("agent") seq_cst store i32 %val, ptr %out2 ret void @@ -5652,7 +5652,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr %out, i32 %in, i32 %old ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -5729,7 +5729,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(ptr %out, ptr %out2, i3 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 store i32 %flag, ptr %out2 @@ -5819,8 +5819,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr %out, i32 %in, i ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -5918,8 +5918,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(ptr %out, ptr %o ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 store i32 %flag, ptr %out2 @@ -6136,7 +6136,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr %out, i32 %in, i64 %ind ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -6230,7 +6230,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(ptr %out, ptr %out2, i3 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %flag = extractvalue { i32, i1 } %val, 0 store i32 %flag, ptr %out2 @@ -6296,7 +6296,7 @@ define amdgpu_kernel void @atomic_xor_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6370,7 +6370,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -6452,8 +6452,8 @@ define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6544,8 +6544,8 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile xor ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -6752,7 +6752,7 @@ define amdgpu_kernel void @atomic_xor_i32_addr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6839,7 +6839,7 @@ define amdgpu_kernel void @atomic_xor_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile xor ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -6905,7 +6905,7 @@ define amdgpu_kernel void @atomic_load_i32_offset(ptr %in, ptr %out) { ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %in, i32 4 + %gep = getelementptr inbounds i32, ptr %in, i32 4 %val = load atomic i32, ptr %gep seq_cst, align 4 store i32 %val, ptr %out ret void @@ -7050,8 +7050,8 @@ define amdgpu_kernel void @atomic_load_i32_addr64_offset(ptr %in, ptr %out, i64 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %in, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %in, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = load atomic i32, ptr %gep seq_cst, align 4 store i32 %val, ptr %out ret void @@ -7131,7 +7131,7 @@ define amdgpu_kernel void @atomic_load_i32_addr64(ptr %in, ptr %out, i64 %index) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %in, i64 %index + %ptr = getelementptr inbounds i32, ptr %in, i64 %index %val = load atomic i32, ptr %ptr seq_cst, align 4 store i32 %val, ptr %out ret void @@ -7186,7 +7186,7 @@ define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, ptr %out) { ; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 store atomic i32 %in, ptr %gep seq_cst, align 4 ret void } @@ -7302,8 +7302,8 @@ define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, ptr %out, i64 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 store atomic i32 %in, ptr %gep seq_cst, align 4 ret void } @@ -7366,7 +7366,7 @@ define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, ptr %out, i64 %index ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index store atomic i32 %in, ptr %ptr seq_cst, align 4 ret void } @@ -7431,7 +7431,7 @@ define amdgpu_kernel void @atomic_load_f32_offset(ptr %in, ptr %out) { ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr float, ptr %in, i32 4 + %gep = getelementptr inbounds float, ptr %in, i32 4 %val = load atomic float, ptr %gep seq_cst, align 4 store float %val, ptr %out ret void @@ -7576,8 +7576,8 @@ define amdgpu_kernel void @atomic_load_f32_addr64_offset(ptr %in, ptr %out, i64 ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr float, ptr %in, i64 %index - %gep = getelementptr float, ptr %ptr, i32 4 + %ptr = getelementptr inbounds float, ptr %in, i64 %index + %gep = getelementptr inbounds float, ptr %ptr, i32 4 %val = load atomic float, ptr %gep seq_cst, align 4 store float %val, ptr %out ret void @@ -7657,7 +7657,7 @@ define amdgpu_kernel void @atomic_load_f32_addr64(ptr %in, ptr %out, i64 %index) ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr float, ptr %in, i64 %index + %ptr = getelementptr inbounds float, ptr %in, i64 %index %val = load atomic float, ptr %ptr seq_cst, align 4 store float %val, ptr %out ret void @@ -7712,7 +7712,7 @@ define amdgpu_kernel void @atomic_store_f32_offset(float %in, ptr %out) { ; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 store atomic float %in, ptr %gep seq_cst, align 4 ret void } @@ -7828,8 +7828,8 @@ define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, ptr %out, i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 offset:16 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr float, ptr %out, i64 %index - %gep = getelementptr float, ptr %ptr, i32 4 + %ptr = getelementptr inbounds float, ptr %out, i64 %index + %gep = getelementptr inbounds float, ptr %ptr, i32 4 store atomic float %in, ptr %gep seq_cst, align 4 ret void } @@ -7892,7 +7892,7 @@ define amdgpu_kernel void @atomic_store_f32_addr64(float %in, ptr %out, i64 %ind ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr float, ptr %out, i64 %index + %ptr = getelementptr inbounds float, ptr %out, i64 %index store atomic float %in, ptr %ptr seq_cst, align 4 ret void } @@ -7971,7 +7971,7 @@ define amdgpu_kernel void @atomic_load_i8_offset(ptr %in, ptr %out) { ; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %gep = getelementptr i8, ptr %in, i64 16 + %gep = getelementptr inbounds i8, ptr %in, i64 16 %val = load atomic i8, ptr %gep seq_cst, align 1 store i8 %val, ptr %out ret void @@ -8145,8 +8145,8 @@ define amdgpu_kernel void @atomic_load_i8_addr64_offset(ptr %in, ptr %out, i64 % ; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %ptr = getelementptr i8, ptr %in, i64 %index - %gep = getelementptr i8, ptr %ptr, i64 16 + %ptr = getelementptr inbounds i8, ptr %in, i64 %index + %gep = getelementptr inbounds i8, ptr %ptr, i64 16 %val = load atomic i8, ptr %gep seq_cst, align 1 store i8 %val, ptr %out ret void @@ -8212,7 +8212,7 @@ define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, ptr %out) { ; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 offset:16 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %gep = getelementptr i8, ptr %out, i64 16 + %gep = getelementptr inbounds i8, ptr %out, i64 16 store atomic i8 %in, ptr %gep seq_cst, align 1 ret void } @@ -8348,8 +8348,8 @@ define amdgpu_kernel void @atomic_store_i8_addr64_offset(i8 %in, ptr %out, i64 % ; GFX11-FAKE16-NEXT: flat_store_b8 v[0:1], v2 offset:16 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %ptr = getelementptr i8, ptr %out, i64 %index - %gep = getelementptr i8, ptr %ptr, i64 16 + %ptr = getelementptr inbounds i8, ptr %out, i64 %index + %gep = getelementptr inbounds i8, ptr %ptr, i64 16 store atomic i8 %in, ptr %gep seq_cst, align 1 ret void } @@ -8428,7 +8428,7 @@ define amdgpu_kernel void @atomic_load_i16_offset(ptr %in, ptr %out) { ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %gep = getelementptr i16, ptr %in, i64 8 + %gep = getelementptr inbounds i16, ptr %in, i64 8 %val = load atomic i16, ptr %gep seq_cst, align 2 store i16 %val, ptr %out ret void @@ -8607,8 +8607,8 @@ define amdgpu_kernel void @atomic_load_i16_addr64_offset(ptr %in, ptr %out, i64 ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %ptr = getelementptr i16, ptr %in, i64 %index - %gep = getelementptr i16, ptr %ptr, i64 8 + %ptr = getelementptr inbounds i16, ptr %in, i64 %index + %gep = getelementptr inbounds i16, ptr %ptr, i64 8 %val = load atomic i16, ptr %gep seq_cst, align 2 store i16 %val, ptr %out ret void @@ -8674,7 +8674,7 @@ define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, ptr %out) { ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %gep = getelementptr i16, ptr %out, i64 8 + %gep = getelementptr inbounds i16, ptr %out, i64 8 store atomic i16 %in, ptr %gep seq_cst, align 2 ret void } @@ -8816,8 +8816,8 @@ define amdgpu_kernel void @atomic_store_i16_addr64_offset(i16 %in, ptr %out, i64 ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %ptr = getelementptr i16, ptr %out, i64 %index - %gep = getelementptr i16, ptr %ptr, i64 8 + %ptr = getelementptr inbounds i16, ptr %out, i64 %index + %gep = getelementptr inbounds i16, ptr %ptr, i64 8 store atomic i16 %in, ptr %gep seq_cst, align 2 ret void } @@ -8882,7 +8882,7 @@ define amdgpu_kernel void @atomic_store_f16_offset(half %in, ptr %out) { ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 offset:16 ; GFX11-FAKE16-NEXT: s_endpgm entry: - %gep = getelementptr half, ptr %out, i64 8 + %gep = getelementptr inbounds half, ptr %out, i64 8 store atomic half %in, ptr %gep seq_cst, align 2 ret void } @@ -9002,7 +9002,7 @@ define amdgpu_kernel void @atomic_store_bf16_offset(bfloat %in, ptr %out) { ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm - %gep = getelementptr bfloat, ptr %out, i64 8 + %gep = getelementptr inbounds bfloat, ptr %out, i64 8 store atomic bfloat %in, ptr %out seq_cst, align 2 ret void } @@ -9125,7 +9125,7 @@ define amdgpu_kernel void @atomic_inc_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9189,7 +9189,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1023 + %gep = getelementptr inbounds i32, ptr %out, i32 1023 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9257,7 +9257,7 @@ define amdgpu_kernel void @atomic_inc_i32_max_offset_p1(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1024 + %gep = getelementptr inbounds i32, ptr %out, i32 1024 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9331,7 +9331,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -9413,8 +9413,8 @@ define amdgpu_kernel void @atomic_inc_i32_incr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9505,8 +9505,8 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile uinc_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -9713,7 +9713,7 @@ define amdgpu_kernel void @atomic_inc_i32_incr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9800,7 +9800,7 @@ define amdgpu_kernel void @atomic_inc_i32_ret_incr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile uinc_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -9865,7 +9865,7 @@ define amdgpu_kernel void @atomic_dec_i32_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9929,7 +9929,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1023 + %gep = getelementptr inbounds i32, ptr %out, i32 1023 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9997,7 +9997,7 @@ define amdgpu_kernel void @atomic_dec_i32_max_offset_p1(ptr %out, i32 %in) { ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 1024 + %gep = getelementptr inbounds i32, ptr %out, i32 1024 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10071,7 +10071,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_offset(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -10153,8 +10153,8 @@ define amdgpu_kernel void @atomic_dec_i32_decr64_offset(ptr %out, i32 %in, i64 % ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10245,8 +10245,8 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64_offset(ptr %out, ptr %out2, ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i64 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %val = atomicrmw volatile udec_wrap ptr %gep, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -10453,7 +10453,7 @@ define amdgpu_kernel void @atomic_dec_i32_decr64(ptr %out, i32 %in, i64 %index) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10540,7 +10540,7 @@ define amdgpu_kernel void @atomic_dec_i32_ret_decr64(ptr %out, ptr %out2, i32 %i ; GFX11-NEXT: flat_store_b32 v[0:1], v2 ; GFX11-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i64 %index + %ptr = getelementptr inbounds i32, ptr %out, i64 %index %val = atomicrmw volatile udec_wrap ptr %ptr, i32 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i32 %val, ptr %out2 ret void @@ -10619,7 +10619,7 @@ define amdgpu_kernel void @atomic_load_f16_offset(ptr %in, ptr %out) { ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm - %gep = getelementptr half, ptr %in, i64 8 + %gep = getelementptr inbounds half, ptr %in, i64 8 %val = load atomic half, ptr %gep seq_cst, align 2 store half %val, ptr %out ret void @@ -10772,7 +10772,7 @@ define amdgpu_kernel void @atomic_load_bf16_offset(ptr %in, ptr %out) { ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GFX11-FAKE16-NEXT: flat_store_b16 v[0:1], v2 ; GFX11-FAKE16-NEXT: s_endpgm - %gep = getelementptr bfloat, ptr %in, i64 8 + %gep = getelementptr inbounds bfloat, ptr %in, i64 8 %val = load atomic bfloat, ptr %gep seq_cst, align 2 store bfloat %val, ptr %out ret void diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll index 47161954cc332..45f9d9e774079 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i32_system.ll @@ -63,7 +63,7 @@ define void @flat_atomic_xchg_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst ret void } @@ -124,7 +124,7 @@ define i32 @flat_atomic_xchg_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, i32 %in seq_cst ret i32 %result } @@ -203,7 +203,7 @@ define amdgpu_gfx void @flat_atomic_xchg_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst ret void } @@ -282,7 +282,7 @@ define amdgpu_gfx i32 @flat_atomic_xchg_i32_ret_offset_scalar(ptr inreg %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, i32 %in seq_cst ret i32 %result } @@ -315,7 +315,7 @@ define void @flat_atomic_xchg_i32_noret_offset__amdgpu_no_remote_memory(ptr %out ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -348,7 +348,7 @@ define i32 @flat_atomic_xchg_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -413,7 +413,7 @@ define void @flat_atomic_xchg_f32_noret_offset(ptr %out, float %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst ret void } @@ -474,7 +474,7 @@ define float @flat_atomic_xchg_f32_ret_offset(ptr %out, float %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, float %in seq_cst ret float %result } @@ -553,7 +553,7 @@ define amdgpu_gfx void @flat_atomic_xchg_f32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst ret void } @@ -632,7 +632,7 @@ define amdgpu_gfx float @flat_atomic_xchg_f32_ret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i32 4 + %gep = getelementptr inbounds float, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, float %in seq_cst ret float %result } @@ -665,7 +665,7 @@ define void @flat_atomic_xchg_f32_noret_offset__amdgpu_no_remote_memory(ptr %out ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i64 4 + %gep = getelementptr inbounds float, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -698,7 +698,7 @@ define float @flat_atomic_xchg_f32_ret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr float, ptr %out, i64 4 + %gep = getelementptr inbounds float, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, float %in seq_cst, !amdgpu.no.remote.memory !0 ret float %result } @@ -763,7 +763,7 @@ define void @flat_atomic_add_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst ret void } @@ -824,7 +824,7 @@ define i32 @flat_atomic_add_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw add ptr %gep, i32 %in seq_cst ret i32 %result } @@ -903,7 +903,7 @@ define amdgpu_gfx void @flat_atomic_add_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst ret void } @@ -982,7 +982,7 @@ define amdgpu_gfx i32 @flat_atomic_add_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw add ptr %gep, i32 %in seq_cst ret i32 %result } @@ -1015,7 +1015,7 @@ define void @flat_atomic_add_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -1048,7 +1048,7 @@ define i32 @flat_atomic_add_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw add ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -1191,7 +1191,7 @@ define void @flat_atomic_sub_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst ret void } @@ -1334,7 +1334,7 @@ define i32 @flat_atomic_sub_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw sub ptr %gep, i32 %in seq_cst ret i32 %result } @@ -1485,7 +1485,7 @@ define amdgpu_gfx void @flat_atomic_sub_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst ret void } @@ -1644,7 +1644,7 @@ define amdgpu_gfx i32 @flat_atomic_sub_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw sub ptr %gep, i32 %in seq_cst ret i32 %result } @@ -1677,7 +1677,7 @@ define void @flat_atomic_sub_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -1710,7 +1710,7 @@ define i32 @flat_atomic_sub_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw sub ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -1853,7 +1853,7 @@ define void @flat_atomic_and_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst ret void } @@ -1996,7 +1996,7 @@ define i32 @flat_atomic_and_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw and ptr %gep, i32 %in seq_cst ret i32 %result } @@ -2147,7 +2147,7 @@ define amdgpu_gfx void @flat_atomic_and_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst ret void } @@ -2306,7 +2306,7 @@ define amdgpu_gfx i32 @flat_atomic_and_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw and ptr %gep, i32 %in seq_cst ret i32 %result } @@ -2339,7 +2339,7 @@ define void @flat_atomic_and_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -2372,7 +2372,7 @@ define i32 @flat_atomic_and_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw and ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -2521,7 +2521,7 @@ define void @flat_atomic_nand_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst ret void } @@ -2670,7 +2670,7 @@ define i32 @flat_atomic_nand_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw nand ptr %gep, i32 %in seq_cst ret i32 %result } @@ -2827,7 +2827,7 @@ define amdgpu_gfx void @flat_atomic_nand_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst ret void } @@ -2992,7 +2992,7 @@ define amdgpu_gfx i32 @flat_atomic_nand_i32_ret_offset_scalar(ptr inreg %out, i3 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw nand ptr %gep, i32 %in seq_cst ret i32 %result } @@ -3067,7 +3067,7 @@ define void @flat_atomic_nand_i32_noret_offset__amdgpu_no_remote_memory(ptr %out ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw nand ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -3143,7 +3143,7 @@ define i32 @flat_atomic_nand_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw nand ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -3286,7 +3286,7 @@ define void @flat_atomic_or_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst ret void } @@ -3429,7 +3429,7 @@ define i32 @flat_atomic_or_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw or ptr %gep, i32 %in seq_cst ret i32 %result } @@ -3580,7 +3580,7 @@ define amdgpu_gfx void @flat_atomic_or_i32_noret_offset_scalar(ptr inreg %out, i ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst ret void } @@ -3739,7 +3739,7 @@ define amdgpu_gfx i32 @flat_atomic_or_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw or ptr %gep, i32 %in seq_cst ret i32 %result } @@ -3772,7 +3772,7 @@ define void @flat_atomic_or_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw or ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -3805,7 +3805,7 @@ define i32 @flat_atomic_or_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i32 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw or ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -3948,7 +3948,7 @@ define void @flat_atomic_xor_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst ret void } @@ -4091,7 +4091,7 @@ define i32 @flat_atomic_xor_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw xor ptr %gep, i32 %in seq_cst ret i32 %result } @@ -4242,7 +4242,7 @@ define amdgpu_gfx void @flat_atomic_xor_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst ret void } @@ -4401,7 +4401,7 @@ define amdgpu_gfx i32 @flat_atomic_xor_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw xor ptr %gep, i32 %in seq_cst ret i32 %result } @@ -4434,7 +4434,7 @@ define void @flat_xor_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw xor ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -4467,7 +4467,7 @@ define i32 @flat_atomic_xor_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw xor ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -4610,7 +4610,7 @@ define void @flat_atomic_max_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst ret void } @@ -4753,7 +4753,7 @@ define i32 @flat_atomic_max_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw max ptr %gep, i32 %in seq_cst ret i32 %result } @@ -4904,7 +4904,7 @@ define amdgpu_gfx void @flat_atomic_max_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst ret void } @@ -5063,7 +5063,7 @@ define amdgpu_gfx i32 @flat_atomic_max_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw max ptr %gep, i32 %in seq_cst ret i32 %result } @@ -5157,8 +5157,8 @@ define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr %out, i32 %in, i32 % ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst ret void } @@ -5267,8 +5267,8 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -5359,7 +5359,7 @@ define amdgpu_kernel void @atomic_max_i32_addr64(ptr %out, i32 %in, i32 %index) ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index + %ptr = getelementptr inbounds i32, ptr %out, i32 %index %tmp0 = atomicrmw max ptr %ptr, i32 %in seq_cst ret void } @@ -5464,7 +5464,7 @@ define amdgpu_kernel void @atomic_max_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index + %ptr = getelementptr inbounds i32, ptr %out, i32 %index %tmp0 = atomicrmw max ptr %ptr, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -5498,7 +5498,7 @@ define void @flat_max_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw max ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -5531,7 +5531,7 @@ define i32 @flat_atomic_max_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw max ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -5674,7 +5674,7 @@ define void @flat_atomic_umax_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst ret void } @@ -5817,7 +5817,7 @@ define i32 @flat_atomic_umax_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw umax ptr %gep, i32 %in seq_cst ret i32 %result } @@ -5968,7 +5968,7 @@ define amdgpu_gfx void @flat_atomic_umax_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst ret void } @@ -6127,7 +6127,7 @@ define amdgpu_gfx i32 @flat_atomic_umax_i32_ret_offset_scalar(ptr inreg %out, i3 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw umax ptr %gep, i32 %in seq_cst ret i32 %result } @@ -6221,8 +6221,8 @@ define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr %out, i32 %in, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst ret void } @@ -6331,8 +6331,8 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(ptr %out, ptr %out2 ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -6438,7 +6438,7 @@ define amdgpu_kernel void @atomic_umax_i32_ret_addr64(ptr %out, ptr %out2, i32 % ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index + %ptr = getelementptr inbounds i32, ptr %out, i32 %index %tmp0 = atomicrmw umax ptr %ptr, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -6472,7 +6472,7 @@ define void @flat_umax_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 % ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw umax ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -6505,7 +6505,7 @@ define i32 @flat_atomic_umax_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw umax ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -6648,7 +6648,7 @@ define void @flat_atomic_umin_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst ret void } @@ -6791,7 +6791,7 @@ define i32 @flat_atomic_umin_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw umin ptr %gep, i32 %in seq_cst ret i32 %result } @@ -6942,7 +6942,7 @@ define amdgpu_gfx void @flat_atomic_umin_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst ret void } @@ -7101,7 +7101,7 @@ define amdgpu_gfx i32 @flat_atomic_umin_i32_ret_offset_scalar(ptr inreg %out, i3 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw umin ptr %gep, i32 %in seq_cst ret i32 %result } @@ -7134,7 +7134,7 @@ define void @flat_umin_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 % ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw umin ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -7167,7 +7167,7 @@ define i32 @flat_atomic_umin_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw umin ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -7310,7 +7310,7 @@ define void @flat_atomic_min_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst ret void } @@ -7453,7 +7453,7 @@ define i32 @flat_atomic_min_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw min ptr %gep, i32 %in seq_cst ret i32 %result } @@ -7604,7 +7604,7 @@ define amdgpu_gfx void @flat_atomic_min_i32_noret_offset_scalar(ptr inreg %out, ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst ret void } @@ -7763,7 +7763,7 @@ define amdgpu_gfx i32 @flat_atomic_min_i32_ret_offset_scalar(ptr inreg %out, i32 ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw min ptr %gep, i32 %in seq_cst ret i32 %result } @@ -7857,8 +7857,8 @@ define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr %out, i32 %in, i32 % ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst ret void } @@ -7967,8 +7967,8 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(ptr %out, ptr %out2, ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index - %gep = getelementptr i32, ptr %ptr, i32 4 + %ptr = getelementptr inbounds i32, ptr %out, i32 %index + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -8151,7 +8151,7 @@ define amdgpu_kernel void @atomic_min_i32_ret_addr64(ptr %out, ptr %out2, i32 %i ; GCN3-NEXT: flat_store_dword v[0:1], v2 ; GCN3-NEXT: s_endpgm entry: - %ptr = getelementptr i32, ptr %out, i32 %index + %ptr = getelementptr inbounds i32, ptr %out, i32 %index %tmp0 = atomicrmw min ptr %ptr, i32 %in seq_cst store i32 %tmp0, ptr %out2 ret void @@ -8185,7 +8185,7 @@ define void @flat_min_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, i32 %i ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw min ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -8218,7 +8218,7 @@ define i32 @flat_atomic_min_i32_ret_offset__amdgpu_no_remote_memory(ptr %out, i3 ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw min ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -8373,7 +8373,7 @@ define void @flat_atomic_uinc_wrap_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst ret void } @@ -8528,7 +8528,7 @@ define i32 @flat_atomic_uinc_wrap_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst ret i32 %result } @@ -8691,7 +8691,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i32_noret_offset_scalar(ptr inreg ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst ret void } @@ -8862,7 +8862,7 @@ define amdgpu_gfx i32 @flat_atomic_uinc_wrap_i32_ret_offset_scalar(ptr inreg %ou ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[34:35] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst ret i32 %result } @@ -8895,7 +8895,7 @@ define void @flat_uinc_wrap_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -8928,7 +8928,7 @@ define i32 @flat_atomic_uinc_wrap_i32_ret_offset__amdgpu_no_remote_memory(ptr %o ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw uinc_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } @@ -9089,7 +9089,7 @@ define void @flat_atomic_udec_wrap_i32_noret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst ret void } @@ -9250,7 +9250,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset(ptr %out, i32 %in) { ; GCN3-NEXT: s_or_b64 exec, exec, s[6:7] ; GCN3-NEXT: v_mov_b32_e32 v0, v3 ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst ret i32 %result } @@ -9425,7 +9425,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i32_noret_offset_scalar(ptr inreg ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[36:37] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst ret void } @@ -9608,7 +9608,7 @@ define amdgpu_gfx i32 @flat_atomic_udec_wrap_i32_ret_offset_scalar(ptr inreg %ou ; GCN3-NEXT: ; %bb.2: ; %atomicrmw.end ; GCN3-NEXT: s_or_b64 exec, exec, s[36:37] ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst ret i32 %result } @@ -9641,7 +9641,7 @@ define void @flat_udec_wrap_i32_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret void } @@ -9674,7 +9674,7 @@ define i32 @flat_atomic_udec_wrap_i32_ret_offset__amdgpu_no_remote_memory(ptr %o ; GCN3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GCN3-NEXT: buffer_wbinvl1_vol ; GCN3-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i32, ptr %out, i64 4 + %gep = getelementptr inbounds i32, ptr %out, i64 4 %result = atomicrmw udec_wrap ptr %gep, i32 %in seq_cst, !amdgpu.no.remote.memory !0 ret i32 %result } diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll index 1f105e8dd8ba5..6dfe4594a248c 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll @@ -138,7 +138,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst ret void } @@ -284,7 +284,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -438,8 +438,8 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst ret void } @@ -590,8 +590,8 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -1015,7 +1015,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst ret void } @@ -1161,7 +1161,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -1299,7 +1299,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1442,7 +1442,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -1593,8 +1593,8 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -1742,8 +1742,8 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -2158,7 +2158,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2301,7 +2301,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -2442,7 +2442,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2588,7 +2588,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -2742,8 +2742,8 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -2894,8 +2894,8 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -3319,7 +3319,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3465,7 +3465,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -3606,7 +3606,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -3754,7 +3754,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -3908,8 +3908,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4062,8 +4062,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -4489,7 +4489,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4637,7 +4637,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -4778,7 +4778,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -4926,7 +4926,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -5080,8 +5080,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -5234,8 +5234,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -5661,7 +5661,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -5809,7 +5809,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -5950,7 +5950,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6098,7 +6098,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -6252,8 +6252,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6406,8 +6406,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -6833,7 +6833,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -6981,7 +6981,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -7122,7 +7122,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -7270,7 +7270,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -7424,8 +7424,8 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -7578,8 +7578,8 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -8005,7 +8005,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8153,7 +8153,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -8291,7 +8291,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8434,7 +8434,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -8585,8 +8585,8 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -8734,8 +8734,8 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -9150,7 +9150,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -9293,7 +9293,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -9420,7 +9420,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst ret void } @@ -9546,7 +9546,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, double %in syncscope("agent") seq_cst ret void } @@ -9672,7 +9672,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr ptr, ptr %out, i32 4 + %gep = getelementptr inbounds ptr, ptr %out, i32 4 %val = atomicrmw volatile xchg ptr %gep, ptr %in syncscope("agent") seq_cst ret void } @@ -9812,7 +9812,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -9952,8 +9952,8 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst ret void } @@ -10098,8 +10098,8 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -10489,7 +10489,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst ret void } @@ -10629,7 +10629,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst store i64 %tmp0, ptr %out2 ret void @@ -10767,7 +10767,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -10910,7 +10910,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -11061,8 +11061,8 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -11210,8 +11210,8 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -11626,7 +11626,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -11769,7 +11769,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -11820,7 +11820,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %in, i64 4 + %gep = getelementptr inbounds i64, ptr %in, i64 4 %val = load atomic i64, ptr %gep seq_cst, align 8 store i64 %val, ptr %out ret void @@ -11930,8 +11930,8 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %in, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %in, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = load atomic i64, ptr %gep seq_cst, align 8 store i64 %val, ptr %out ret void @@ -11991,7 +11991,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr %in, ptr %out, i64 %index) ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %in, i64 %index + %ptr = getelementptr inbounds i64, ptr %in, i64 %index %val = load atomic i64, ptr %ptr seq_cst, align 8 store i64 %val, ptr %out ret void @@ -12035,7 +12035,7 @@ define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 store atomic i64 %in, ptr %gep seq_cst, align 8 ret void } @@ -12129,8 +12129,8 @@ define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 store atomic i64 %in, ptr %gep seq_cst, align 8 ret void } @@ -12182,7 +12182,7 @@ define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, ptr %out, i64 %index ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index store atomic i64 %in, ptr %ptr seq_cst, align 8 ret void } @@ -12333,7 +12333,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst ret void } @@ -12484,7 +12484,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 9000 + %gep = getelementptr inbounds i64, ptr %out, i64 9000 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst ret void } @@ -12633,7 +12633,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -12791,8 +12791,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst ret void } @@ -12954,8 +12954,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -13398,7 +13398,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst ret void } @@ -13555,7 +13555,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -13607,7 +13607,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %in, i64 4 + %gep = getelementptr inbounds double, ptr %in, i64 4 %val = load atomic double, ptr %gep seq_cst, align 8 store double %val, ptr %out ret void @@ -13717,8 +13717,8 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %in, i64 %index - %gep = getelementptr double, ptr %ptr, i64 4 + %ptr = getelementptr inbounds double, ptr %in, i64 %index + %gep = getelementptr inbounds double, ptr %ptr, i64 4 %val = load atomic double, ptr %gep seq_cst, align 8 store double %val, ptr %out ret void @@ -13778,7 +13778,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index) ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %in, i64 %index + %ptr = getelementptr inbounds double, ptr %in, i64 %index %val = load atomic double, ptr %ptr seq_cst, align 8 store double %val, ptr %out ret void @@ -13822,7 +13822,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 store atomic double %in, ptr %gep seq_cst, align 8 ret void } @@ -13916,8 +13916,8 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %out, i64 %index - %gep = getelementptr double, ptr %ptr, i64 4 + %ptr = getelementptr inbounds double, ptr %out, i64 %index + %gep = getelementptr inbounds double, ptr %ptr, i64 4 store atomic double %in, ptr %gep seq_cst, align 8 ret void } @@ -13969,7 +13969,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %out, i64 %index + %ptr = getelementptr inbounds double, ptr %out, i64 %index store atomic double %in, ptr %ptr seq_cst, align 8 ret void } @@ -14116,7 +14116,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14269,7 +14269,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -14430,8 +14430,8 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -14589,8 +14589,8 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -15035,7 +15035,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s0 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15188,7 +15188,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -15345,7 +15345,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4 ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15509,7 +15509,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -15679,8 +15679,8 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -15849,8 +15849,8 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void @@ -16324,7 +16324,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: scratch_store_b64 off, v[0:1], s4 ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 ret void } @@ -16488,7 +16488,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !amdgpu.no.fine.grained.memory !0 store i64 %tmp0, ptr %out2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll index 757649ca592b3..e5187a811a230 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_noprivate.ll @@ -45,7 +45,7 @@ define amdgpu_kernel void @atomic_add_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -104,7 +104,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -165,8 +165,8 @@ define amdgpu_kernel void @atomic_add_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -230,8 +230,8 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile add ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -385,7 +385,7 @@ define amdgpu_kernel void @atomic_add_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -445,7 +445,7 @@ define amdgpu_kernel void @atomic_add_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile add ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -517,7 +517,7 @@ define amdgpu_kernel void @atomic_and_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -602,7 +602,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -687,8 +687,8 @@ define amdgpu_kernel void @atomic_and_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -778,8 +778,8 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile and ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -1015,7 +1015,7 @@ define amdgpu_kernel void @atomic_and_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -1101,7 +1101,7 @@ define amdgpu_kernel void @atomic_and_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile and ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -1175,7 +1175,7 @@ define amdgpu_kernel void @atomic_sub_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -1262,7 +1262,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -1349,8 +1349,8 @@ define amdgpu_kernel void @atomic_sub_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -1442,8 +1442,8 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile sub ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -1685,7 +1685,7 @@ define amdgpu_kernel void @atomic_sub_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -1773,7 +1773,7 @@ define amdgpu_kernel void @atomic_sub_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile sub ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -1849,7 +1849,7 @@ define amdgpu_kernel void @atomic_max_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -1938,7 +1938,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -2027,8 +2027,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -2122,8 +2122,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile max ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -2371,7 +2371,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -2461,7 +2461,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile max ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -2537,7 +2537,7 @@ define amdgpu_kernel void @atomic_umax_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -2626,7 +2626,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -2715,8 +2715,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -2810,8 +2810,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umax ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -3059,7 +3059,7 @@ define amdgpu_kernel void @atomic_umax_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -3149,7 +3149,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umax ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -3225,7 +3225,7 @@ define amdgpu_kernel void @atomic_min_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -3314,7 +3314,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -3403,8 +3403,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -3498,8 +3498,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile min ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -3747,7 +3747,7 @@ define amdgpu_kernel void @atomic_min_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -3837,7 +3837,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile min ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -3913,7 +3913,7 @@ define amdgpu_kernel void @atomic_umin_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -4002,7 +4002,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -4091,8 +4091,8 @@ define amdgpu_kernel void @atomic_umin_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -4186,8 +4186,8 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile umin ptr %gep, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -4435,7 +4435,7 @@ define amdgpu_kernel void @atomic_umin_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_SE ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 ret void } @@ -4525,7 +4525,7 @@ define amdgpu_kernel void @atomic_umin_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile umin ptr %ptr, i64 %in syncscope("workgroup") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -4597,7 +4597,7 @@ define amdgpu_kernel void @atomic_or_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -4682,7 +4682,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_offset(ptr %out, ptr %out2, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -4767,8 +4767,8 @@ define amdgpu_kernel void @atomic_or_i64_addr64_offset(ptr %out, i64 %in, i64 %i ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -4858,8 +4858,8 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile or ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5095,7 +5095,7 @@ define amdgpu_kernel void @atomic_or_i64_addr64(ptr %out, i64 %in, i64 %index) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5181,7 +5181,7 @@ define amdgpu_kernel void @atomic_or_i64_ret_addr64(ptr %out, ptr %out2, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile or ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5229,7 +5229,7 @@ define amdgpu_kernel void @atomic_xchg_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5276,7 +5276,7 @@ define amdgpu_kernel void @atomic_xchg_f64_offset(ptr %out, double %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, double %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5323,7 +5323,7 @@ define amdgpu_kernel void @atomic_xchg_pointer_offset(ptr %out, ptr %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr ptr, ptr %out, i32 4 + %gep = getelementptr inbounds ptr, ptr %out, i32 4 %val = atomicrmw volatile xchg ptr %gep, ptr %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5382,7 +5382,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_offset(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5443,8 +5443,8 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5508,8 +5508,8 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xchg ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5663,7 +5663,7 @@ define amdgpu_kernel void @atomic_xchg_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5723,7 +5723,7 @@ define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xchg ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5795,7 +5795,7 @@ define amdgpu_kernel void @atomic_xor_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -5880,7 +5880,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -5965,8 +5965,8 @@ define amdgpu_kernel void @atomic_xor_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -6056,8 +6056,8 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile xor ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -6293,7 +6293,7 @@ define amdgpu_kernel void @atomic_xor_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -6379,7 +6379,7 @@ define amdgpu_kernel void @atomic_xor_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile xor ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -6430,7 +6430,7 @@ define amdgpu_kernel void @atomic_load_i64_offset(ptr %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %in, i64 4 + %gep = getelementptr inbounds i64, ptr %in, i64 4 %val = load atomic i64, ptr %gep seq_cst, align 8 store i64 %val, ptr %out ret void @@ -6540,8 +6540,8 @@ define amdgpu_kernel void @atomic_load_i64_addr64_offset(ptr %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %in, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %in, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = load atomic i64, ptr %gep seq_cst, align 8 store i64 %val, ptr %out ret void @@ -6601,7 +6601,7 @@ define amdgpu_kernel void @atomic_load_i64_addr64(ptr %in, ptr %out, i64 %index) ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %in, i64 %index + %ptr = getelementptr inbounds i64, ptr %in, i64 %index %val = load atomic i64, ptr %ptr seq_cst, align 8 store i64 %val, ptr %out ret void @@ -6645,7 +6645,7 @@ define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 store atomic i64 %in, ptr %gep seq_cst, align 8 ret void } @@ -6739,8 +6739,8 @@ define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 store atomic i64 %in, ptr %gep seq_cst, align 8 ret void } @@ -6792,7 +6792,7 @@ define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, ptr %out, i64 %index ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index store atomic i64 %in, ptr %ptr seq_cst, align 8 ret void } @@ -6848,7 +6848,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_offset(ptr %out, i64 %in, i64 %old ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -6904,7 +6904,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(ptr %out, i64 %in, i64 %ol ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 9000 + %gep = getelementptr inbounds i64, ptr %out, i64 9000 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -6964,7 +6964,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(ptr %out, ptr %out2, i6 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -7026,8 +7026,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(ptr %out, i64 %in, i ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -7099,8 +7099,8 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(ptr %out, ptr %o ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %val = cmpxchg volatile ptr %gep, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -7266,7 +7266,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(ptr %out, i64 %in, i64 %ind ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 ret void } @@ -7334,7 +7334,7 @@ define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(ptr %out, ptr %out2, i6 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %val = cmpxchg volatile ptr %ptr, i64 %old, i64 %in syncscope("agent") seq_cst seq_cst, !noalias.addrspace !0 %extract0 = extractvalue { i64, i1 } %val, 0 store i64 %extract0, ptr %out2 @@ -7386,7 +7386,7 @@ define amdgpu_kernel void @atomic_load_f64_offset(ptr %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %in, i64 4 + %gep = getelementptr inbounds double, ptr %in, i64 4 %val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void @@ -7496,8 +7496,8 @@ define amdgpu_kernel void @atomic_load_f64_addr64_offset(ptr %in, ptr %out, i64 ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %in, i64 %index - %gep = getelementptr double, ptr %ptr, i64 4 + %ptr = getelementptr inbounds double, ptr %in, i64 %index + %gep = getelementptr inbounds double, ptr %ptr, i64 4 %val = load atomic double, ptr %gep seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void @@ -7557,7 +7557,7 @@ define amdgpu_kernel void @atomic_load_f64_addr64(ptr %in, ptr %out, i64 %index) ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %in, i64 %index + %ptr = getelementptr inbounds double, ptr %in, i64 %index %val = load atomic double, ptr %ptr seq_cst, align 8, !noalias.addrspace !0 store double %val, ptr %out ret void @@ -7601,7 +7601,7 @@ define amdgpu_kernel void @atomic_store_f64_offset(double %in, ptr %out) { ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -7695,8 +7695,8 @@ define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, ptr %out, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] offset:32 scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %out, i64 %index - %gep = getelementptr double, ptr %ptr, i64 4 + %ptr = getelementptr inbounds double, ptr %out, i64 %index + %gep = getelementptr inbounds double, ptr %ptr, i64 4 store atomic double %in, ptr %gep seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -7748,7 +7748,7 @@ define amdgpu_kernel void @atomic_store_f64_addr64(double %in, ptr %out, i64 %in ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] scope:SCOPE_SYS ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr double, ptr %out, i64 %index + %ptr = getelementptr inbounds double, ptr %out, i64 %index store atomic double %in, ptr %ptr seq_cst, align 8, !noalias.addrspace !0 ret void } @@ -7825,7 +7825,7 @@ define amdgpu_kernel void @atomic_inc_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -7916,7 +7916,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -8007,8 +8007,8 @@ define amdgpu_kernel void @atomic_inc_i64_incr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -8104,8 +8104,8 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile uinc_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -8355,7 +8355,7 @@ define amdgpu_kernel void @atomic_inc_i64_incr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -8447,7 +8447,7 @@ define amdgpu_kernel void @atomic_inc_i64_ret_incr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile uinc_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -8533,7 +8533,7 @@ define amdgpu_kernel void @atomic_dec_i64_offset(ptr %out, i64 %in) { ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -8632,7 +8632,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_offset(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -8731,8 +8731,8 @@ define amdgpu_kernel void @atomic_dec_i64_decr64_offset(ptr %out, i64 %in, i64 % ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -8836,8 +8836,8 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64_offset(ptr %out, ptr %out2, ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw volatile udec_wrap ptr %gep, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void @@ -9111,7 +9111,7 @@ define amdgpu_kernel void @atomic_dec_i64_decr64(ptr %out, i64 %in, i64 %index) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 ret void } @@ -9211,7 +9211,7 @@ define amdgpu_kernel void @atomic_dec_i64_ret_decr64(ptr %out, ptr %out2, i64 %i ; GFX12-NEXT: flat_store_b64 v[2:3], v[0:1] ; GFX12-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw volatile udec_wrap ptr %ptr, i64 %in syncscope("agent") seq_cst, !noalias.addrspace !0 store i64 %tmp0, ptr %out2 ret void diff --git a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll index 4dea4495b36fb..f655d4761fa31 100644 --- a/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll +++ b/llvm/test/CodeGen/AMDGPU/flat_atomics_i64_system_noprivate.ll @@ -63,7 +63,7 @@ define void @flat_atomic_xchg_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -124,7 +124,7 @@ define i64 @flat_atomic_xchg_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -209,7 +209,7 @@ define amdgpu_gfx void @flat_atomic_xchg_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -294,7 +294,7 @@ define amdgpu_gfx i64 @flat_atomic_xchg_i64_ret_offset_scalar(ptr inreg %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -327,7 +327,7 @@ define void @flat_atomic_xchg_i64_noret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -360,7 +360,7 @@ define i64 @flat_atomic_xchg_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -425,7 +425,7 @@ define void @flat_atomic_xchg_f64_noret_offset(ptr %out, double %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i32 4 + %gep = getelementptr inbounds double, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 ret void } @@ -486,7 +486,7 @@ define double @flat_atomic_xchg_f64_ret_offset(ptr %out, double %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i32 4 + %gep = getelementptr inbounds double, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 ret double %result } @@ -571,7 +571,7 @@ define amdgpu_gfx void @flat_atomic_xchg_f64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i32 4 + %gep = getelementptr inbounds double, ptr %out, i32 4 %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 ret void } @@ -656,7 +656,7 @@ define amdgpu_gfx double @flat_atomic_xchg_f64_ret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i32 4 + %gep = getelementptr inbounds double, ptr %out, i32 4 %result = atomicrmw xchg ptr %gep, double %in seq_cst, !noalias.addrspace !1 ret double %result } @@ -689,7 +689,7 @@ define void @flat_atomic_xchg_f64_noret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 %tmp0 = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -722,7 +722,7 @@ define double @flat_atomic_xchg_f64_ret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr double, ptr %out, i64 4 + %gep = getelementptr inbounds double, ptr %out, i64 4 %result = atomicrmw xchg ptr %gep, double %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret double %result } @@ -787,7 +787,7 @@ define void @flat_atomic_add_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -848,7 +848,7 @@ define i64 @flat_atomic_add_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -933,7 +933,7 @@ define amdgpu_gfx void @flat_atomic_add_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -1018,7 +1018,7 @@ define amdgpu_gfx i64 @flat_atomic_add_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw add ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -1051,7 +1051,7 @@ define void @flat_atomic_add_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -1084,7 +1084,7 @@ define i64 @flat_atomic_add_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw add ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -1251,7 +1251,7 @@ define void @flat_atomic_sub_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -1422,7 +1422,7 @@ define i64 @flat_atomic_sub_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -1619,7 +1619,7 @@ define amdgpu_gfx void @flat_atomic_sub_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -1816,7 +1816,7 @@ define amdgpu_gfx i64 @flat_atomic_sub_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -1849,7 +1849,7 @@ define void @flat_atomic_sub_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -1882,7 +1882,7 @@ define i64 @flat_atomic_sub_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw sub ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -2049,7 +2049,7 @@ define void @flat_atomic_and_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -2220,7 +2220,7 @@ define i64 @flat_atomic_and_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -2411,7 +2411,7 @@ define amdgpu_gfx void @flat_atomic_and_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -2602,7 +2602,7 @@ define amdgpu_gfx i64 @flat_atomic_and_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw and ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -2635,7 +2635,7 @@ define void @flat_atomic_and_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -2668,7 +2668,7 @@ define i64 @flat_atomic_and_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw and ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -2847,7 +2847,7 @@ define void @flat_atomic_nand_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -3030,7 +3030,7 @@ define i64 @flat_atomic_nand_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -3233,7 +3233,7 @@ define amdgpu_gfx void @flat_atomic_nand_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -3436,7 +3436,7 @@ define amdgpu_gfx i64 @flat_atomic_nand_i64_ret_offset_scalar(ptr inreg %out, i6 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -3526,7 +3526,7 @@ define void @flat_atomic_nand_i64_noret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -3618,7 +3618,7 @@ define i64 @flat_atomic_nand_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw nand ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -3785,7 +3785,7 @@ define void @flat_atomic_or_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -3956,7 +3956,7 @@ define i64 @flat_atomic_or_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -4147,7 +4147,7 @@ define amdgpu_gfx void @flat_atomic_or_i64_noret_offset_scalar(ptr inreg %out, i ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -4338,7 +4338,7 @@ define amdgpu_gfx i64 @flat_atomic_or_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw or ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -4371,7 +4371,7 @@ define void @flat_atomic_or_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -4404,7 +4404,7 @@ define i64 @flat_atomic_or_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i64 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw or ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -4571,7 +4571,7 @@ define void @flat_atomic_xor_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -4742,7 +4742,7 @@ define i64 @flat_atomic_xor_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -4933,7 +4933,7 @@ define amdgpu_gfx void @flat_atomic_xor_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -5124,7 +5124,7 @@ define amdgpu_gfx i64 @flat_atomic_xor_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -5157,7 +5157,7 @@ define void @flat_atomic_xor_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -5190,7 +5190,7 @@ define i64 @flat_atomic_xor_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw xor ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -5363,7 +5363,7 @@ define void @flat_atomic_max_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -5540,7 +5540,7 @@ define i64 @flat_atomic_max_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -5749,7 +5749,7 @@ define amdgpu_gfx void @flat_atomic_max_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -5958,7 +5958,7 @@ define amdgpu_gfx i64 @flat_atomic_max_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -6064,8 +6064,8 @@ define amdgpu_kernel void @atomic_max_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -6180,8 +6180,8 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -6284,7 +6284,7 @@ define amdgpu_kernel void @atomic_max_i64_addr64(ptr %out, i64 %in, i64 %index) ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -6395,7 +6395,7 @@ define amdgpu_kernel void @atomic_max_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw max ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -6429,7 +6429,7 @@ define void @flat_atomic_max_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -6462,7 +6462,7 @@ define i64 @flat_atomic_max_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw max ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -6635,7 +6635,7 @@ define void @flat_atomic_umax_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -6812,7 +6812,7 @@ define i64 @flat_atomic_umax_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -7021,7 +7021,7 @@ define amdgpu_gfx void @flat_atomic_umax_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -7230,7 +7230,7 @@ define amdgpu_gfx i64 @flat_atomic_umax_i64_ret_offset_scalar(ptr inreg %out, i6 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -7336,8 +7336,8 @@ define amdgpu_kernel void @atomic_umax_i64_addr64_offset(ptr %out, i64 %in, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -7452,8 +7452,8 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(ptr %out, ptr %out2 ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -7565,7 +7565,7 @@ define amdgpu_kernel void @atomic_umax_i64_ret_addr64(ptr %out, ptr %out2, i64 % ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw umax ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -7599,7 +7599,7 @@ define void @flat_atomic_umax_i64_noret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -7632,7 +7632,7 @@ define i64 @flat_atomic_umax_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umax ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -7805,7 +7805,7 @@ define void @flat_atomic_umin_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -7982,7 +7982,7 @@ define i64 @flat_atomic_umin_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -8191,7 +8191,7 @@ define amdgpu_gfx void @flat_atomic_umin_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -8400,7 +8400,7 @@ define amdgpu_gfx i64 @flat_atomic_umin_i64_ret_offset_scalar(ptr inreg %out, i6 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -8433,7 +8433,7 @@ define void @flat_atomic_umin_i64_noret_offset__amdgpu_no_remote_memory(ptr %out ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -8466,7 +8466,7 @@ define i64 @flat_atomic_umin_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw umin ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -8639,7 +8639,7 @@ define void @flat_atomic_min_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -8816,7 +8816,7 @@ define i64 @flat_atomic_min_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -9025,7 +9025,7 @@ define amdgpu_gfx void @flat_atomic_min_i64_noret_offset_scalar(ptr inreg %out, ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -9234,7 +9234,7 @@ define amdgpu_gfx i64 @flat_atomic_min_i64_ret_offset_scalar(ptr inreg %out, i64 ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -9340,8 +9340,8 @@ define amdgpu_kernel void @atomic_min_i64_addr64_offset(ptr %out, i64 %in, i64 % ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -9456,8 +9456,8 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(ptr %out, ptr %out2, ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index - %gep = getelementptr i64, ptr %ptr, i64 4 + %ptr = getelementptr inbounds i64, ptr %out, i64 %index + %gep = getelementptr inbounds i64, ptr %ptr, i64 4 %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -9664,7 +9664,7 @@ define amdgpu_kernel void @atomic_min_i64_ret_addr64(ptr %out, ptr %out2, i64 %i ; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; GFX9-NEXT: s_endpgm entry: - %ptr = getelementptr i64, ptr %out, i64 %index + %ptr = getelementptr inbounds i64, ptr %out, i64 %index %tmp0 = atomicrmw min ptr %ptr, i64 %in seq_cst, !noalias.addrspace !1, !noalias.addrspace !1 store i64 %tmp0, ptr %out2 ret void @@ -9698,7 +9698,7 @@ define void @flat_atomic_min_i64_noret_offset__amdgpu_no_remote_memory(ptr %out, ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -9731,7 +9731,7 @@ define i64 @flat_atomic_min_i64_ret_offset__amdgpu_no_remote_memory(ptr %out, i6 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw min ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -9916,7 +9916,7 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -10105,7 +10105,7 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -10314,7 +10314,7 @@ define amdgpu_gfx void @flat_atomic_uinc_wrap_i64_noret_offset_scalar(ptr inreg ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -10523,7 +10523,7 @@ define amdgpu_gfx i64 @flat_atomic_uinc_wrap_i64_ret_offset_scalar(ptr inreg %ou ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[34:35] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -10556,7 +10556,7 @@ define void @flat_atomic_uinc_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -10589,7 +10589,7 @@ define i64 @flat_atomic_uinc_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw uinc_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } @@ -10786,7 +10786,7 @@ define void @flat_atomic_udec_wrap_i64_noret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[8:9] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -10987,7 +10987,7 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset(ptr %out, i64 %in) { ; GFX9-NEXT: v_mov_b32_e32 v0, v4 ; GFX9-NEXT: v_mov_b32_e32 v1, v5 ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -11220,7 +11220,7 @@ define amdgpu_gfx void @flat_atomic_udec_wrap_i64_noret_offset_scalar(ptr inreg ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[38:39] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret void } @@ -11453,7 +11453,7 @@ define amdgpu_gfx i64 @flat_atomic_udec_wrap_i64_ret_offset_scalar(ptr inreg %ou ; GFX9-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX9-NEXT: s_or_b64 exec, exec, s[38:39] ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !noalias.addrspace !1 ret i64 %result } @@ -11486,7 +11486,7 @@ define void @flat_atomic_udec_wrap_i64_noret_offset__amdgpu_no_remote_memory(ptr ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %tmp0 = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret void } @@ -11519,7 +11519,7 @@ define i64 @flat_atomic_udec_wrap_i64_ret_offset__amdgpu_no_remote_memory(ptr %o ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i64, ptr %out, i64 4 + %gep = getelementptr inbounds i64, ptr %out, i64 4 %result = atomicrmw udec_wrap ptr %gep, i64 %in seq_cst, !amdgpu.no.remote.memory !0, !noalias.addrspace !1 ret i64 %result } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll index 37c57ef57570e..cfa03402ef048 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -579,7 +579,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_store_dword v[0:1], v2 ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) store i32 %result, ptr %out ret void @@ -665,7 +665,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -729,9 +729,9 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr % ; GFX9-NEXT: flat_store_dword v[0:1], v3 ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %out.gep = getelementptr i32, ptr %out, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id + %out.gep = getelementptr inbounds i32, ptr %out, i32 %id + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) store i32 %result, ptr %out.gep ret void @@ -784,8 +784,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -895,7 +895,7 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) store i64 %result, ptr %out ret void @@ -987,7 +987,7 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -1054,9 +1054,9 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr % ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %out.gep = getelementptr i64, ptr %out, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id + %out.gep = getelementptr inbounds i64, ptr %out, i32 %id + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) store i64 %result, ptr %out.gep ret void @@ -1112,8 +1112,8 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 2f4ecb8b0de92..cccd2449c3f01 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -1145,7 +1145,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0 ; GFX9-NEXT: v_mov_b32_e32 v1, s1 ; GFX9-NEXT: flat_store_dword v[0:1], v2 ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) store i32 %result, ptr %out ret void @@ -1231,7 +1231,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm - %gep = getelementptr i32, ptr %ptr, i32 4 + %gep = getelementptr inbounds i32, ptr %ptr, i32 4 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -1295,9 +1295,9 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr % ; GFX9-NEXT: flat_store_dword v[0:1], v3 ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %out.gep = getelementptr i32, ptr %out, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id + %out.gep = getelementptr inbounds i32, ptr %out, i32 %id + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) store i32 %result, ptr %out.gep ret void @@ -1350,8 +1350,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0 ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, ptr %ptr, i32 %id - %gep = getelementptr i32, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i32, ptr %ptr, i32 %id + %gep = getelementptr inbounds i32, ptr %gep.tid, i32 5 %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -1533,7 +1533,7 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0 ; GFX9-NEXT: v_mov_b32_e32 v3, s1 ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) store i64 %result, ptr %out ret void @@ -1625,7 +1625,7 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind { ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm - %gep = getelementptr i64, ptr %ptr, i32 4 + %gep = getelementptr inbounds i64, ptr %ptr, i32 4 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -1692,9 +1692,9 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr % ; GFX9-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %out.gep = getelementptr i64, ptr %out, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id + %out.gep = getelementptr inbounds i64, ptr %out, i32 %id + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) store i64 %result, ptr %out.gep ret void @@ -1750,8 +1750,8 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0 ; GFX9-NEXT: buffer_wbinvl1_vol ; GFX9-NEXT: s_endpgm %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, ptr %ptr, i32 %id - %gep = getelementptr i64, ptr %gep.tid, i32 5 + %gep.tid = getelementptr inbounds i64, ptr %ptr, i32 %id + %gep = getelementptr inbounds i64, ptr %gep.tid, i32 5 %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index 9e2906cf85432..bf516f8b91c91 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -3486,7 +3486,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic ret void } @@ -3772,7 +3772,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic ret void } @@ -4052,7 +4052,7 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic ret void } @@ -4365,7 +4365,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic ret void } @@ -4678,7 +4678,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic ret void } @@ -4964,7 +4964,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire ret void } @@ -5250,7 +5250,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire ret void } @@ -5563,7 +5563,7 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire ret void } @@ -5876,7 +5876,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire ret void } @@ -6189,7 +6189,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire ret void } @@ -6502,7 +6502,7 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst ret void } @@ -6815,7 +6815,7 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst ret void } @@ -7128,7 +7128,7 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst ret void } @@ -7441,7 +7441,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst ret void } @@ -7754,7 +7754,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst ret void } @@ -8053,7 +8053,7 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8370,7 +8370,7 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8698,7 +8698,7 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9386,7 +9386,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9703,7 +9703,7 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10020,7 +10020,7 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10364,7 +10364,7 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10708,7 +10708,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11052,7 +11052,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11396,7 +11396,7 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11740,7 +11740,7 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12084,7 +12084,7 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12428,7 +12428,7 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12772,7 +12772,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -16295,7 +16295,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic ret void } @@ -16577,7 +16577,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic ret void } @@ -16857,7 +16857,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic ret void } @@ -17166,7 +17166,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic ret void } @@ -17475,7 +17475,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic ret void } @@ -17757,7 +17757,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire ret void } @@ -18039,7 +18039,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire ret void } @@ -18348,7 +18348,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire ret void } @@ -18657,7 +18657,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire ret void } @@ -18966,7 +18966,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire ret void } @@ -19275,7 +19275,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst ret void } @@ -19584,7 +19584,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst ret void } @@ -19893,7 +19893,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst ret void } @@ -20202,7 +20202,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst ret void } @@ -20511,7 +20511,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst ret void } @@ -20810,7 +20810,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21138,7 +21138,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21466,7 +21466,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21821,7 +21821,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22176,7 +22176,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22504,7 +22504,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22832,7 +22832,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23187,7 +23187,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23542,7 +23542,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23897,7 +23897,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24252,7 +24252,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24607,7 +24607,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24962,7 +24962,7 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25317,7 +25317,7 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25672,7 +25672,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("agent-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll index 27283beb4b877..b2b71c246c97b 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll @@ -3479,7 +3479,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic monotonic ret void } @@ -3765,7 +3765,7 @@ define amdgpu_kernel void @flat_cluster_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire monotonic ret void } @@ -4044,7 +4044,7 @@ define amdgpu_kernel void @flat_cluster_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release monotonic ret void } @@ -4356,7 +4356,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel monotonic ret void } @@ -4668,7 +4668,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst monotonic ret void } @@ -4954,7 +4954,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic acquire ret void } @@ -5240,7 +5240,7 @@ define amdgpu_kernel void @flat_cluster_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire acquire ret void } @@ -5552,7 +5552,7 @@ define amdgpu_kernel void @flat_cluster_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release acquire ret void } @@ -5864,7 +5864,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel acquire ret void } @@ -6176,7 +6176,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst acquire ret void } @@ -6488,7 +6488,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic seq_cst ret void } @@ -6800,7 +6800,7 @@ define amdgpu_kernel void @flat_cluster_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire seq_cst ret void } @@ -7112,7 +7112,7 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release seq_cst ret void } @@ -7424,7 +7424,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel seq_cst ret void } @@ -7736,7 +7736,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst seq_cst ret void } @@ -8035,7 +8035,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8352,7 +8352,7 @@ define amdgpu_kernel void @flat_cluster_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8679,7 +8679,7 @@ define amdgpu_kernel void @flat_cluster_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9022,7 +9022,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9365,7 +9365,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9682,7 +9682,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9999,7 +9999,7 @@ define amdgpu_kernel void @flat_cluster_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10342,7 +10342,7 @@ define amdgpu_kernel void @flat_cluster_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10685,7 +10685,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11028,7 +11028,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11371,7 +11371,7 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11714,7 +11714,7 @@ define amdgpu_kernel void @flat_cluster_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12057,7 +12057,7 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12400,7 +12400,7 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12743,7 +12743,7 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -16259,7 +16259,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic monotonic ret void } @@ -16541,7 +16541,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire monotonic ret void } @@ -16820,7 +16820,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release monotonic ret void } @@ -17128,7 +17128,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel monotonic ret void } @@ -17436,7 +17436,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst monotonic ret void } @@ -17718,7 +17718,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic acquire ret void } @@ -18000,7 +18000,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire acquire ret void } @@ -18308,7 +18308,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release acquire ret void } @@ -18616,7 +18616,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel acquire ret void } @@ -18924,7 +18924,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst acquire ret void } @@ -19232,7 +19232,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic seq_cst ret void } @@ -19540,7 +19540,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire seq_cst ret void } @@ -19848,7 +19848,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release seq_cst ret void } @@ -20156,7 +20156,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel seq_cst ret void } @@ -20464,7 +20464,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst seq_cst ret void } @@ -20763,7 +20763,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21091,7 +21091,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21418,7 +21418,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21772,7 +21772,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22126,7 +22126,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22454,7 +22454,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22782,7 +22782,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23136,7 +23136,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23490,7 +23490,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23844,7 +23844,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24198,7 +24198,7 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24552,7 +24552,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24906,7 +24906,7 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25260,7 +25260,7 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25614,7 +25614,7 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("cluster-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll index a05f4c718c351..7d357922ac307 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll @@ -3094,7 +3094,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic ret void } @@ -3347,7 +3347,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic ret void } @@ -3600,7 +3600,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic ret void } @@ -3853,7 +3853,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic ret void } @@ -4106,7 +4106,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic ret void } @@ -4359,7 +4359,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire ret void } @@ -4612,7 +4612,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire ret void } @@ -4865,7 +4865,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire ret void } @@ -5118,7 +5118,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire ret void } @@ -5371,7 +5371,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire ret void } @@ -5624,7 +5624,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst ret void } @@ -5877,7 +5877,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst ret void } @@ -6130,7 +6130,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst ret void } @@ -6383,7 +6383,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst ret void } @@ -6636,7 +6636,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst ret void } @@ -6935,7 +6935,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7236,7 +7236,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7537,7 +7537,7 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7838,7 +7838,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8139,7 +8139,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8440,7 +8440,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8741,7 +8741,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9343,7 +9343,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9644,7 +9644,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9945,7 +9945,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10246,7 +10246,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10547,7 +10547,7 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10848,7 +10848,7 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11149,7 +11149,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -14237,7 +14237,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic ret void } @@ -14490,7 +14490,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic ret void } @@ -14743,7 +14743,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic ret void } @@ -14996,7 +14996,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic ret void } @@ -15249,7 +15249,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic ret void } @@ -15502,7 +15502,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire ret void } @@ -15755,7 +15755,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire ret void } @@ -16008,7 +16008,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire ret void } @@ -16261,7 +16261,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire ret void } @@ -16514,7 +16514,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire ret void } @@ -16767,7 +16767,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst ret void } @@ -17020,7 +17020,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst ret void } @@ -17273,7 +17273,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst ret void } @@ -17526,7 +17526,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst ret void } @@ -17779,7 +17779,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst ret void } @@ -18078,7 +18078,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18379,7 +18379,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18680,7 +18680,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18981,7 +18981,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19282,7 +19282,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19583,7 +19583,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19884,7 +19884,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20185,7 +20185,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20486,7 +20486,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20787,7 +20787,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21088,7 +21088,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21389,7 +21389,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21690,7 +21690,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21991,7 +21991,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22292,7 +22292,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("singlethread-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 74065146fd385..d5b37650ae9cc 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -3530,7 +3530,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic ret void } @@ -3818,7 +3818,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic ret void } @@ -4102,7 +4102,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic ret void } @@ -4421,7 +4421,7 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic ret void } @@ -4740,7 +4740,7 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic ret void } @@ -5028,7 +5028,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire ret void } @@ -5316,7 +5316,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire ret void } @@ -5635,7 +5635,7 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire ret void } @@ -5954,7 +5954,7 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire ret void } @@ -6273,7 +6273,7 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire ret void } @@ -6592,7 +6592,7 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst ret void } @@ -6911,7 +6911,7 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst ret void } @@ -7230,7 +7230,7 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst ret void } @@ -7549,7 +7549,7 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst ret void } @@ -7868,7 +7868,7 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst ret void } @@ -8167,7 +8167,7 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8486,7 +8486,7 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8818,7 +8818,7 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9168,7 +9168,7 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9518,7 +9518,7 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9837,7 +9837,7 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10156,7 +10156,7 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10506,7 +10506,7 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10856,7 +10856,7 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11206,7 +11206,7 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11556,7 +11556,7 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11906,7 +11906,7 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12256,7 +12256,7 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12606,7 +12606,7 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -12956,7 +12956,7 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -16523,7 +16523,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic ret void } @@ -16807,7 +16807,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic ret void } @@ -17091,7 +17091,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic ret void } @@ -17406,7 +17406,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic ret void } @@ -17721,7 +17721,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic ret void } @@ -18005,7 +18005,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire ret void } @@ -18289,7 +18289,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire ret void } @@ -18604,7 +18604,7 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire ret void } @@ -18919,7 +18919,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire ret void } @@ -19234,7 +19234,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire ret void } @@ -19549,7 +19549,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst ret void } @@ -19864,7 +19864,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst ret void } @@ -20179,7 +20179,7 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst ret void } @@ -20494,7 +20494,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst ret void } @@ -20809,7 +20809,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst ret void } @@ -21108,7 +21108,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21438,7 +21438,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21770,7 +21770,7 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22131,7 +22131,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22492,7 +22492,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22822,7 +22822,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23152,7 +23152,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23513,7 +23513,7 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23874,7 +23874,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24235,7 +24235,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24596,7 +24596,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -24957,7 +24957,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25318,7 +25318,7 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -25679,7 +25679,7 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -26040,7 +26040,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll index 8734e7152e281..b8e324ff5f458 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll @@ -3094,7 +3094,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic ret void } @@ -3347,7 +3347,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic ret void } @@ -3600,7 +3600,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic ret void } @@ -3853,7 +3853,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic ret void } @@ -4106,7 +4106,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic ret void } @@ -4359,7 +4359,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire ret void } @@ -4612,7 +4612,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire ret void } @@ -4865,7 +4865,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire ret void } @@ -5118,7 +5118,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire ret void } @@ -5371,7 +5371,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire ret void } @@ -5624,7 +5624,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst ret void } @@ -5877,7 +5877,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst ret void } @@ -6130,7 +6130,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst ret void } @@ -6383,7 +6383,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst ret void } @@ -6636,7 +6636,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst ret void } @@ -6935,7 +6935,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7236,7 +7236,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7537,7 +7537,7 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7838,7 +7838,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8139,7 +8139,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8440,7 +8440,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8741,7 +8741,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9042,7 +9042,7 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9343,7 +9343,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9644,7 +9644,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9945,7 +9945,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10246,7 +10246,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10547,7 +10547,7 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10848,7 +10848,7 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11149,7 +11149,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -14237,7 +14237,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic ret void } @@ -14490,7 +14490,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic ret void } @@ -14743,7 +14743,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release monotonic ret void } @@ -14996,7 +14996,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic ret void } @@ -15249,7 +15249,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic ret void } @@ -15502,7 +15502,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire ret void } @@ -15755,7 +15755,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire ret void } @@ -16008,7 +16008,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire ret void } @@ -16261,7 +16261,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire ret void } @@ -16514,7 +16514,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire ret void } @@ -16767,7 +16767,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst ret void } @@ -17020,7 +17020,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst ret void } @@ -17273,7 +17273,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst ret void } @@ -17526,7 +17526,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst ret void } @@ -17779,7 +17779,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst ret void } @@ -18078,7 +18078,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18379,7 +18379,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18680,7 +18680,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -18981,7 +18981,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19282,7 +19282,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19583,7 +19583,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19884,7 +19884,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20185,7 +20185,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20486,7 +20486,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20787,7 +20787,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21088,7 +21088,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21389,7 +21389,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21690,7 +21690,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21991,7 +21991,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("wavefront-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index d384aec2a2b19..d44e7fff2359f 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -3401,7 +3401,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic ret void } @@ -3674,7 +3674,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic ret void } @@ -3951,7 +3951,7 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic ret void } @@ -4250,7 +4250,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic ret void } @@ -4549,7 +4549,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic ret void } @@ -4822,7 +4822,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire ret void } @@ -5095,7 +5095,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire ret void } @@ -5394,7 +5394,7 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire ret void } @@ -5693,7 +5693,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire ret void } @@ -5992,7 +5992,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire ret void } @@ -6291,7 +6291,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst ret void } @@ -6590,7 +6590,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -6903,7 +6903,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7228,7 +7228,7 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7562,7 +7562,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -7896,7 +7896,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8209,7 +8209,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8522,7 +8522,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -8856,7 +8856,7 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9190,7 +9190,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9524,7 +9524,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -9861,7 +9861,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10198,7 +10198,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10532,7 +10532,7 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -10866,7 +10866,7 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -11200,7 +11200,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -14536,7 +14536,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic ret void } @@ -14800,7 +14800,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic ret void } @@ -15073,7 +15073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic ret void } @@ -15360,7 +15360,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic ret void } @@ -15647,7 +15647,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic ret void } @@ -15911,7 +15911,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire ret void } @@ -16175,7 +16175,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire ret void } @@ -16462,7 +16462,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire ret void } @@ -16749,7 +16749,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire ret void } @@ -17036,7 +17036,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire ret void } @@ -17320,7 +17320,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst ret void } @@ -17604,7 +17604,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst ret void } @@ -17891,7 +17891,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst ret void } @@ -18178,7 +18178,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst ret void } @@ -18465,7 +18465,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst ret void } @@ -18764,7 +18764,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19073,7 +19073,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19394,7 +19394,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -19726,7 +19726,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20058,7 +20058,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst monotonic %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20367,7 +20367,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -20676,7 +20676,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21008,7 +21008,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21340,7 +21340,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -21672,7 +21672,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst acquire %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22001,7 +22001,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") monotonic seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22330,7 +22330,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acquire seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22662,7 +22662,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") release seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -22994,7 +22994,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") acq_rel seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 @@ -23326,7 +23326,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX1250-NEXT: s_endpgm ptr %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, ptr %out, i32 4 + %gep = getelementptr inbounds i32, ptr %out, i32 4 %val = cmpxchg volatile ptr %gep, i32 %old, i32 %in syncscope("workgroup-one-as") seq_cst seq_cst %val0 = extractvalue { i32, i1 } %val, 0 store i32 %val0, ptr %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll index e50ed3ee95140..4d189d7a9673d 100644 --- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll +++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll @@ -1492,7 +1492,7 @@ define i8 @flat_inst_valu_offset_2x_neg_24bit_max(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -16777215 + %gep = getelementptr inbounds i8, ptr %p, i64 -16777215 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1597,7 +1597,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589936639 + %gep = getelementptr inbounds i8, ptr %p, i64 8589936639 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1702,7 +1702,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589936640 + %gep = getelementptr inbounds i8, ptr %p, i64 8589936640 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1807,7 +1807,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589938687 + %gep = getelementptr inbounds i8, ptr %p, i64 8589938687 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -1903,7 +1903,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589938688 + %gep = getelementptr inbounds i8, ptr %p, i64 8589938688 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2008,7 +2008,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589942783 + %gep = getelementptr inbounds i8, ptr %p, i64 8589942783 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2104,7 +2104,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 8589942784 + %gep = getelementptr inbounds i8, ptr %p, i64 8589942784 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2211,7 +2211,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854773761 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773761 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2318,7 +2318,7 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854773760 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773760 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2425,7 +2425,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854771713 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771713 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2532,7 +2532,7 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854771712 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771712 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2639,7 +2639,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854767617 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767617 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -2746,7 +2746,7 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) { ; GFX12-GISEL-NEXT: flat_load_u8 v0, v[0:1] ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31] - %gep = getelementptr i8, ptr %p, i64 -9223372036854767616 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767616 %load = load i8, ptr %gep, align 4 ret i8 %load } @@ -4232,7 +4232,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589936639 + %gep = getelementptr inbounds i8, ptr %p, i64 8589936639 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4351,7 +4351,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589936640 + %gep = getelementptr inbounds i8, ptr %p, i64 8589936640 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4470,7 +4470,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589938687 + %gep = getelementptr inbounds i8, ptr %p, i64 8589938687 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4590,7 +4590,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589938688 + %gep = getelementptr inbounds i8, ptr %p, i64 8589938688 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4710,7 +4710,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589942783 + %gep = getelementptr inbounds i8, ptr %p, i64 8589942783 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4830,7 +4830,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) { ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 8589942784 + %gep = getelementptr inbounds i8, ptr %p, i64 8589942784 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -4955,7 +4955,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854773761 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773761 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -5080,7 +5080,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854773760 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854773760 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -5205,7 +5205,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854771713 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771713 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -5330,7 +5330,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854771712 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854771712 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -5455,7 +5455,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854767617 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767617 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void @@ -5580,7 +5580,7 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr ; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-GISEL-NEXT: flat_store_b8 v[0:1], v0 ; GFX12-GISEL-NEXT: s_endpgm - %gep = getelementptr i8, ptr %p, i64 -9223372036854767616 + %gep = getelementptr inbounds i8, ptr %p, i64 -9223372036854767616 %load = load volatile i8, ptr %gep, align 1 store i8 %load, ptr poison ret void diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll index 85a9aba1a0e51..57eb25700c76a 100644 --- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll +++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll @@ -2658,7 +2658,7 @@ define amdgpu_kernel void @negativeoffsetnullptr(ptr %buffer) { ; GFX11-FAKE16-NEXT: s_endpgm entry: %null = select i1 false, ptr %buffer, ptr addrspacecast (ptr addrspace(5) null to ptr) - %gep = getelementptr i8, ptr %null, i64 -1 + %gep = getelementptr inbounds i8, ptr %null, i64 -1 %ld = load i8, ptr %gep %cmp = icmp eq i8 %ld, 0 br label %branch