Skip to content

Commit

Permalink
GlobalISel: Fix CSEMIRBuilder mishandling constant folds of vectors
Browse files Browse the repository at this point in the history
This was ignoring the requested result register, resulting in a
missing def when this happened in the IRTranslator. Fixes some crashes
and verifier errors at -O0.

Alternatively we could pass DstOps to the constant fold functions.
  • Loading branch information
arsenm committed Jan 18, 2022
1 parent ec47dba commit da72822
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 19 deletions.
7 changes: 4 additions & 3 deletions llvm/include/llvm/CodeGen/GlobalISel/Utils.h
Expand Up @@ -270,9 +270,10 @@ Optional<APFloat> ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
/// If successful, returns the G_BUILD_VECTOR representing the folded vector
/// constant. \p MIB should have an insertion point already set to create new
/// G_CONSTANT instructions as needed.
Optional<MachineInstr *>
ConstantFoldVectorBinop(unsigned Opcode, const Register Op1, const Register Op2,
const MachineRegisterInfo &MRI, MachineIRBuilder &MIB);
Register ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI,
MachineIRBuilder &MIB);

Optional<APInt> ConstantFoldExtOp(unsigned Opcode, const Register Op1,
uint64_t Imm, const MachineRegisterInfo &MRI);
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
Expand Up @@ -191,10 +191,10 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
assert(DstOps.size() == 1 && "Invalid dsts");
if (SrcOps[0].getLLTTy(*getMRI()).isVector()) {
// Try to constant fold vector constants.
auto VecCst = ConstantFoldVectorBinop(
Register VecCst = ConstantFoldVectorBinop(
Opc, SrcOps[0].getReg(), SrcOps[1].getReg(), *getMRI(), *this);
if (VecCst)
return MachineInstrBuilder(getMF(), *VecCst);
return buildCopy(DstOps[0], VecCst);
break;
}
if (Optional<APInt> Cst = ConstantFoldBinOp(Opc, SrcOps[0].getReg(),
Expand Down
17 changes: 8 additions & 9 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Expand Up @@ -592,17 +592,16 @@ Optional<APFloat> llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
return None;
}

Optional<MachineInstr *>
llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI,
MachineIRBuilder &MIB) {
Register llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
const Register Op2,
const MachineRegisterInfo &MRI,
MachineIRBuilder &MIB) {
auto *SrcVec1 = getOpcodeDef<GBuildVector>(Op1, MRI);
if (!SrcVec1)
return None;
return Register();
auto *SrcVec2 = getOpcodeDef<GBuildVector>(Op2, MRI);
if (!SrcVec2)
return None;
return Register();

const LLT EltTy = MRI.getType(SrcVec1->getSourceReg(0));

Expand All @@ -611,14 +610,14 @@ llvm::ConstantFoldVectorBinop(unsigned Opcode, const Register Op1,
auto MaybeCst = ConstantFoldBinOp(Opcode, SrcVec1->getSourceReg(Idx),
SrcVec2->getSourceReg(Idx), MRI);
if (!MaybeCst)
return None;
return Register();
auto FoldedCstReg = MIB.buildConstant(EltTy, *MaybeCst).getReg(0);
FoldedElements.emplace_back(FoldedCstReg);
}
// Create the new vector constant.
auto CstVec =
MIB.buildBuildVector(MRI.getType(SrcVec1->getReg(0)), FoldedElements);
return &*CstVec;
return CstVec.getReg(0);
}

bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
Expand Down
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
@@ -0,0 +1,8 @@
; RUN: llc -O0 -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s

define amdgpu_kernel void @test_long_add4() {
entry:
%add = add <4 x i64> zeroinitializer, zeroinitializer
store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
ret void
}
@@ -0,0 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -o - %s | FileCheck %s

; The CSEMIRBuilder was mishandling the result operand when constant
; folding a vector operation, resulting in a missing def for the
; stored value.
define amdgpu_kernel void @constant_fold_vector_add() {
; CHECK-LABEL: name: constant_fold_vector_add
; CHECK: bb.1.entry:
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64), [[C]](s64), [[C]](s64)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY [[BUILD_VECTOR1]](<4 x s64>)
; CHECK-NEXT: G_STORE [[COPY]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1)
; CHECK-NEXT: S_ENDPGM 0
entry:
%add = add <4 x i64> zeroinitializer, zeroinitializer
store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
ret void
}
Expand Up @@ -195,15 +195,16 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C3]](s64)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[BUILD_VECTOR4]](<2 x s64>)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY9]](<2 x p1>)
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s64>) = COPY [[BUILD_VECTOR4]](<2 x s64>)
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(<2 x p1>) = G_PTR_ADD [[BUILD_VECTOR]], [[COPY9]](<2 x s64>)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(<2 x p1>) = COPY [[PTR_ADD]](<2 x p1>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY10]](<2 x p1>)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
; CHECK-NEXT: $vgpr3 = COPY [[UV3]](s32)
; CHECK-NEXT: [[COPY10:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY10]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY11]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> <i64 1, i64 2>
ret <2 x i32 addrspace(1)*> %gep
}

0 comments on commit da72822

Please sign in to comment.