Skip to content

Commit

Permalink
[GlobalISel] Implement some binary reassociations, G_ADD for now
Browse files Browse the repository at this point in the history
- (op (op X, C1), C2) -> (op X, (op C1, C2))
- (op (op X, C1), Y) -> (op (op X, Y), C1)

Some code duplication with the G_PTR_ADD reassociations unfortunately but no
easy way to avoid it that I can see.

Differential Revision: https://reviews.llvm.org/D150230
  • Loading branch information
aemerson committed Jun 9, 2023
1 parent 3004e9f commit 086601e
Show file tree
Hide file tree
Showing 11 changed files with 264 additions and 39 deletions.
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,12 @@ class CombinerHelper {
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Try to reassociate to reassociate operands of a commutative binop.
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0,
Register Op1, BuildFnTy &MatchInfo);
/// Reassociate commutative binary operations like G_ADD.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo);

/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);

Expand Down
12 changes: 12 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
Expand Down Expand Up @@ -3584,6 +3585,17 @@ class TargetLowering : public TargetLoweringBase {
return N0.hasOneUse();
}

// Lets target to control the following reassociation of operands: (op (op x,
// c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
// default consider profitable any case where N0 has single use. This
// behavior reflects the condition replaced by this target hook call in the
// combiner. Any particular target can implement its own heuristic to
// restrict common combiner.
virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
Register N1) const {
return MRI.hasOneNonDBGUse(N0);
}

virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
return false;
}
Expand Down
8 changes: 7 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -862,7 +862,13 @@ def reassoc_ptradd : GICombineRule<
[{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;

def reassocs : GICombineGroup<[reassoc_ptradd]>;
def reassoc_comm_binops : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_ADD $root, $src1, $src2):$root,
[{ return Helper.matchReassocCommBinOp(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

def reassocs : GICombineGroup<[reassoc_ptradd, reassoc_comm_binops]>;

// Constant fold operations.
def constant_fold : GICombineRule<
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4501,6 +4501,58 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,

return false;
}
bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
Register OpLHS, Register OpRHS,
BuildFnTy &MatchInfo) {
LLT OpRHSTy = MRI.getType(OpRHS);
MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);

if (OpLHSDef->getOpcode() != Opc)
return false;

MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();

if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI)) {
if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
// (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
MatchInfo = [=](MachineIRBuilder &B) {
auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
};
return true;
}
if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS) &&
MRI.hasOneNonDBGUse(OpLHSLHS)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
MatchInfo = [=](MachineIRBuilder &B) {
auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
};
return true;
}
}

return false;
}

bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
BuildFnTy &MatchInfo) {
// We don't check if the reassociation will break a legal addressing mode
// here since pointer arithmetic is handled by G_PTR_ADD.
unsigned Opc = MI.getOpcode();
Register DstReg = MI.getOperand(0).getReg();
Register LHSReg = MI.getOperand(1).getReg();
Register RHSReg = MI.getOperand(2).getReg();

if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
return true;
if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
return true;
return false;
}

bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5158,6 +5158,11 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
}
}

bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return true; // FIXME: handle regbanks
}

TargetLowering::AtomicExpansionKind
AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
switch (RMW->getOperation()) {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,9 @@ class AMDGPUTargetLowering : public TargetLowering {
bool SNaN = false,
unsigned Depth = 0) const override;

bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
Register N1) const override;

/// Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13572,6 +13572,11 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
hasMemSDNodeUser(*N0->use_begin()));
}

bool SITargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return true; // FIXME: handle regbanks
}

MachineMemOperand::Flags
SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -495,6 +495,9 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const override;

bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
Register N1) const override;

bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
bool isCanonicalized(Register Reg, MachineFunction &MF,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s

# Combins: (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
---
name: test1_add_move_inner_cst_to_fold
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0
; CHECK-LABEL: name: test1_add_move_inner_cst_to_fold
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s64) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 88
; CHECK-NEXT: %add_outer:_(s64) = G_ADD %x, [[C]]
; CHECK-NEXT: $x0 = COPY %add_outer(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%C1:_(s64) = G_CONSTANT i64 64
%C2:_(s64) = G_CONSTANT i64 24
%add_inner:_(s64) = G_ADD %x, %C1
%add_outer:_(s64) = G_ADD %add_inner, %C2
$x0 = COPY %add_outer
RET_ReallyLR implicit $x0
...

# (op (op x, c1), y) -> (op (op x, y), c1)
---
name: test2_add_move_inner_cst_to_rhs
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s64) = COPY $x0
; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: %y:_(s64) = COPY $x1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD %x, %y
; CHECK-NEXT: %add_outer:_(s64) = G_ADD [[ADD]], %C1
; CHECK-NEXT: $x0 = COPY %add_outer(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%C1:_(s64) = G_CONSTANT i64 64
%y:_(s64) = COPY $x1
%add_inner:_(s64) = G_ADD %x, %C1
%add_outer:_(s64) = G_ADD %add_inner, %y
$x0 = COPY %add_outer
RET_ReallyLR implicit $x0
...
---
name: test2_add_move_inner_cst_to_rhs_multiuse
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs_multiuse
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(s64) = COPY $x0
; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: %y:_(s64) = COPY $x1
; CHECK-NEXT: %add_inner:_(s64) = G_ADD %x, %C1
; CHECK-NEXT: %add_outer:_(s64) = G_ADD %add_inner, %y
; CHECK-NEXT: $x0 = COPY %add_outer(s64)
; CHECK-NEXT: $x1 = COPY %add_inner(s64)
; CHECK-NEXT: RET_ReallyLR implicit $x0
%x:_(s64) = COPY $x0
%C1:_(s64) = G_CONSTANT i64 64
%y:_(s64) = COPY $x1
%add_inner:_(s64) = G_ADD %x, %C1
%add_outer:_(s64) = G_ADD %add_inner, %y
$x0 = COPY %add_outer
$x1 = COPY %add_inner
RET_ReallyLR implicit $x0
...
---
name: test2_add_move_inner_cst_to_rhs_vector
alignment: 4
tracksRegLiveness: true
liveins:
- { reg: '$x0' }
body: |
bb.1:
liveins: $q0, $q1
; CHECK-LABEL: name: test2_add_move_inner_cst_to_rhs_vector
; CHECK: liveins: $q0, $q1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %x:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: %C1:_(s64) = G_CONSTANT i64 64
; CHECK-NEXT: %VEC_C1:_(<2 x s64>) = G_BUILD_VECTOR %C1(s64), %C1(s64)
; CHECK-NEXT: %y:_(<2 x s64>) = COPY $q1
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<2 x s64>) = G_ADD %x, %y
; CHECK-NEXT: %add_outer:_(<2 x s64>) = G_ADD [[ADD]], %VEC_C1
; CHECK-NEXT: $q0 = COPY %add_outer(<2 x s64>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%x:_(<2 x s64>) = COPY $q0
%C1:_(s64) = G_CONSTANT i64 64
%VEC_C1:_(<2 x s64>) = G_BUILD_VECTOR %C1, %C1
%y:_(<2 x s64>) = COPY $q1
%add_inner:_(<2 x s64>) = G_ADD %x, %VEC_C1
%add_outer:_(<2 x s64>) = G_ADD %add_inner, %y
$q0 = COPY %add_outer
RET_ReallyLR implicit $q0
...
39 changes: 21 additions & 18 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5016,9 +5016,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
Expand All @@ -5032,9 +5033,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX7-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_sgpr_vgpr
Expand All @@ -5048,9 +5050,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_sgpr_vgpr(<4 x i32> inr
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX8-NEXT: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY5]], [[S_MOV_B32_]], implicit-def $scc
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY4]], [[REG_SEQUENCE]], [[S_ADD_I32_]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.s, 1024
Expand All @@ -5071,10 +5074,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX6-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX6-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX6-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX6-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX6-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX6-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX7-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
Expand All @@ -5088,10 +5091,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX7-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX7-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX7-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX7-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX7-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX7-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX7-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX7-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
; GFX8-LABEL: name: s_buffer_load_f32_offset_add_imm_vgpr_sgpr
Expand All @@ -5105,10 +5108,10 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
; GFX8-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
; GFX8-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_CO_U32_e64_]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, implicit $exec :: (dereferenceable invariant load (s32))
; GFX8-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%offset.base = add i32 %offset.v, 1024
Expand Down

0 comments on commit 086601e

Please sign in to comment.