diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 07b331d713570e..97d5ea31e3fb80 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -267,7 +267,8 @@ def select_constant_cmp: GICombineRule< // Fold x op 0 -> x def right_identity_zero: GICombineRule< (defs root:$root), - (match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR):$root, + (match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR, + G_PTR_ADD):$root, [{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]), (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }]) >; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir index 6312b68dd8d50b..55e63ba01d35e8 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-trivial-arith.mir @@ -307,3 +307,21 @@ body: | $x0 = COPY %mul(s64) RET_ReallyLR implicit $x0 ... +--- +name: right_ident_ptr_add +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0 + ; Fold (x + 0) -> x + ; + ; CHECK-LABEL: name: right_ident_ptr_add + ; CHECK: liveins: $x0 + ; CHECK: %x:_(p0) = COPY $x0 + ; CHECK: $x0 = COPY %x(p0) + ; CHECK: RET_ReallyLR implicit $x0 + %x:_(p0) = COPY $x0 + %cst:_(s64) = G_CONSTANT i64 0 + %op:_(p0) = G_PTR_ADD %x(p0), %cst + $x0 = COPY %op(p0) + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index 53b9649f7e336c..794fa1935e58e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -178,18 +178,18 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_add_u32 s2, 4, 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s1, 0x104, s1 -; GFX9-NEXT: scratch_load_dword v1, off, s2 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x104, s0 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_add_u32 s1, 0x104, s1 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_add_u32 s0, 0x104, s0 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -201,8 +201,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX10-NEXT: s_add_u32 s1, 4, 0 -; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc +; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -237,8 +236,8 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_add_u32 s0, 4, 0 -; GFX9-NEXT: scratch_load_dword v1, off, s0 glc +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 @@ -263,11 +262,10 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() { ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x104 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: s_add_u32 s0, 4, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc +; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v3, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -296,8 +294,7 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX9-LABEL: store_load_vindex_small_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s0, s32, 0 -; GFX9-NEXT: scratch_load_dword v1, off, s0 glc +; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x100 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 @@ -323,10 +320,9 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) { ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: s_add_u32 s0, s32, 0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc +; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v3, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -355,18 +351,18 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX9-NEXT: s_load_dword s0, s[0:1], 0x24 ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s2, s5 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 -; GFX9-NEXT: s_add_u32 s2, 4, 0 -; GFX9-NEXT: v_mov_b32_e32 v0, 15 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v0, off, vcc_hi offset:4 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_lshl_b32 s1, s0, 2 ; GFX9-NEXT: s_and_b32 s0, s0, 15 ; GFX9-NEXT: s_lshl_b32 s0, s0, 2 -; GFX9-NEXT: s_add_u32 s1, 0x4004, s1 -; GFX9-NEXT: scratch_load_dword v1, off, s2 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 +; GFX9-NEXT: v_mov_b32_e32 v0, 15 +; GFX9-NEXT: s_add_u32 s1, 0x4004, s1 ; GFX9-NEXT: scratch_store_dword off, v0, s1 ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_add_u32 s0, 0x4004, s0 ; GFX9-NEXT: scratch_load_dword v0, off, s0 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_endpgm @@ -378,8 +374,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) { ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 ; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 ; GFX10-NEXT: s_load_dword s0, s[0:1], 0x24 -; GFX10-NEXT: s_add_u32 s1, 4, 0 -; GFX10-NEXT: scratch_load_dword v0, off, s1 glc dlc +; GFX10-NEXT: scratch_load_dword v0, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, 15 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) @@ -414,8 +409,8 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 -; GFX9-NEXT: s_add_u32 s0, 4, 0 -; GFX9-NEXT: scratch_load_dword v1, off, s0 glc +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_load_dword v1, off, vcc_hi offset:4 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 ; GFX9-NEXT: v_sub_u32_e32 v0, 0, v0 @@ -440,11 +435,10 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() { ; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX10-NEXT: v_mov_b32_e32 v2, 0x4004 ; GFX10-NEXT: v_mov_b32_e32 v3, 15 -; GFX10-NEXT: s_add_u32 s0, 4, 0 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc +; GFX10-NEXT: scratch_load_dword v2, off, off offset:4 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v3, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -473,8 +467,7 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX9-LABEL: store_load_vindex_large_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_add_u32 s0, s32, 0 -; GFX9-NEXT: scratch_load_dword v1, off, s0 glc +; GFX9-NEXT: scratch_load_dword v1, off, s32 glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_add_u32 vcc_hi, s32, 0x4000 ; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0 @@ -500,10 +493,9 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) { ; GFX10-NEXT: v_mov_b32_e32 v2, vcc_lo ; GFX10-NEXT: v_mov_b32_e32 v3, 15 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v1 -; GFX10-NEXT: s_add_u32 s0, s32, 0 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v2, v1 -; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc +; GFX10-NEXT: scratch_load_dword v2, off, s32 glc dlc ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: scratch_store_dword v0, v3, off ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -531,11 +523,11 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s3 ; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_add_u32 s0, 4, 0 -; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: s_mov_b32 vcc_hi, 0 +; GFX9-NEXT: scratch_store_dword off, v0, vcc_hi offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: s_add_u32 s0, 4, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s0 @@ -553,9 +545,8 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() { ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_u32 s1, 4, 0 ; GFX10-NEXT: s_add_u32 s0, 4, s0 -; GFX10-NEXT: scratch_store_dword off, v0, s1 +; GFX10-NEXT: scratch_store_dword off, v0, off offset:4 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 @@ -577,11 +568,10 @@ define void @store_load_large_imm_offset_foo() { ; GFX9-LABEL: store_load_large_imm_offset_foo: ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 13 -; GFX9-NEXT: s_add_u32 s0, s32, 0 -; GFX9-NEXT: scratch_store_dword off, v0, s0 +; GFX9-NEXT: scratch_store_dword off, v0, s32 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_movk_i32 s0, 0x3e80 ; GFX9-NEXT: v_mov_b32_e32 v0, 15 ; GFX9-NEXT: s_add_u32 s0, s32, s0 ; GFX9-NEXT: scratch_store_dword off, v0, s0 @@ -597,9 +587,8 @@ define void @store_load_large_imm_offset_foo() { ; GFX10-NEXT: v_mov_b32_e32 v0, 13 ; GFX10-NEXT: v_mov_b32_e32 v1, 15 ; GFX10-NEXT: s_movk_i32 s0, 0x3e80 -; GFX10-NEXT: s_add_u32 s1, s32, 0 ; GFX10-NEXT: s_add_u32 s0, s32, s0 -; GFX10-NEXT: scratch_store_dword off, v0, s1 +; GFX10-NEXT: scratch_store_dword off, v0, s32 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-NEXT: scratch_store_dword off, v1, s0 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0