1,118 changes: 500 additions & 618 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Large diffs are not rendered by default.

48 changes: 44 additions & 4 deletions llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,48 @@ MachineInstrBuilder MachineIRBuilder::buildMaskLowPtrBits(const DstOp &Res,
return buildPtrMask(Res, Op0, MaskReg);
}

MachineInstrBuilder
MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
const SrcOp &Op0) {
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());

assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
"Different vector element types");
assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
"Op0 has more elements");

auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
SmallVector<Register, 8> Regs;
for (auto Op : Unmerge.getInstr()->defs())
Regs.push_back(Op.getReg());
Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(Undef);
return buildMerge(Res, Regs);
}

MachineInstrBuilder
MachineIRBuilder::buildDeleteTrailingVectorElements(const DstOp &Res,
const SrcOp &Op0) {
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());

assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
"Different vector element types");
assert((ResTy.getNumElements() < Op0Ty.getNumElements()) &&
"Op0 has fewer elements");

SmallVector<Register, 8> Regs;
auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
for (unsigned i = 0; i < ResTy.getNumElements(); ++i)
Regs.push_back(Unmerge.getReg(i));
return buildMerge(Res, Regs);
}

MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
Expand Down Expand Up @@ -613,10 +655,8 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
const SrcOp &Op) {
unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
SmallVector<Register, 8> TmpVec;
for (unsigned I = 0; I != NumReg; ++I)
TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
return buildUnmerge(TmpVec, Op);
SmallVector<DstOp, 8> TmpVec(NumReg, Res);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}

MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,6 +923,21 @@ LLT llvm::getLCMType(LLT OrigTy, LLT TargetTy) {
return LLT::scalar(LCMSize);
}

LLT llvm::getCoverTy(LLT OrigTy, LLT TargetTy) {
if (!OrigTy.isVector() || !TargetTy.isVector() || OrigTy == TargetTy ||
(OrigTy.getScalarSizeInBits() != TargetTy.getScalarSizeInBits()))
return getLCMType(OrigTy, TargetTy);

unsigned OrigTyNumElts = OrigTy.getNumElements();
unsigned TargetTyNumElts = TargetTy.getNumElements();
if (OrigTyNumElts % TargetTyNumElts == 0)
return OrigTy;

unsigned NumElts = alignTo(OrigTyNumElts, TargetTyNumElts);
return LLT::scalarOrVector(ElementCount::getFixed(NumElts),
OrigTy.getElementType());
}

LLT llvm::getGCDType(LLT OrigTy, LLT TargetTy) {
const unsigned OrigSize = OrigTy.getSizeInBits();
const unsigned TargetSize = TargetTy.getSizeInBits();
Expand Down
53 changes: 38 additions & 15 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,15 +533,15 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
.legalFor({S32, S16, V2S16})
.minScalar(0, S16)
.clampMaxNumElements(0, S16, 2)
.clampMaxNumElementsStrict(0, S16, 2)
.widenScalarToNextMultipleOf(0, 32)
.maxScalar(0, S32)
.scalarize(0);

getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT, G_SADDSAT, G_SSUBSAT})
.legalFor({S32, S16, V2S16}) // Clamp modifier
.minScalarOrElt(0, S16)
.clampMaxNumElements(0, S16, 2)
.clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.widenScalarToNextPow2(0, 32)
.lower();
Expand Down Expand Up @@ -712,7 +712,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}

if (ST.hasVOP3PInsts())
FPOpActions.clampMaxNumElements(0, S16, 2);
FPOpActions.clampMaxNumElementsStrict(0, S16, 2);

FPOpActions
.scalarize(0)
Expand All @@ -728,7 +728,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,

getActionDefinitionsBuilder({G_FNEG, G_FABS})
.legalFor(FPTypesPK16)
.clampMaxNumElements(0, S16, 2)
.clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.clampScalar(0, S16, S64);

Expand Down Expand Up @@ -965,7 +965,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (ST.has16BitInsts()) {
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({S16, S32, V2S16})
.clampMaxNumElements(0, S16, 2)
.clampMaxNumElementsStrict(0, S16, 2)
// FIXME: Fixing non-power-of-2 before clamp is workaround for
// narrowScalar limitation.
.widenScalarToNextPow2(0)
Expand Down Expand Up @@ -1425,6 +1425,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// FIXME: Doesn't handle extract of illegal sizes.
getActionDefinitionsBuilder(Op)
.lowerIf(all(typeIs(LitTyIdx, S16), sizeIs(BigTyIdx, 32)))
.lowerIf([=](const LegalityQuery &Query) {
// Sub-vector(or single element) insert and extract.
// TODO: verify immediate offset here since lower only works with
// whole elements.
const LLT BigTy = Query.Types[BigTyIdx];
return BigTy.isVector();
})
// FIXME: Multiples of 16 should not be legal.
.legalIf([=](const LegalityQuery &Query) {
const LLT BigTy = Query.Types[BigTyIdx];
Expand Down Expand Up @@ -1583,7 +1590,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// Prefer to reduce vector widths for 16-bit vectors before lowering, to
// get more vector shift opportunities, since we'll get those when
// expanded.
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16));
.clampMaxNumElementsStrict(0, S16, 2);
} else if (ST.has16BitInsts()) {
SextInReg.lowerFor({{S32}, {S64}, {S16}});
} else {
Expand All @@ -1605,14 +1612,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
getActionDefinitionsBuilder(G_FSHR)
.legalFor({{S32, S32}})
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();

if (ST.hasVOP3PInsts()) {
getActionDefinitionsBuilder(G_FSHL)
.lowerFor({{V2S16, V2S16}})
.fewerElementsIf(elementTypeIs(0, S16), changeTo(0, V2S16))
.clampMaxNumElementsStrict(0, S16, 2)
.scalarize(0)
.lower();
} else {
Expand Down Expand Up @@ -2535,10 +2542,8 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
} else {
// For cases where the widened type isn't a nice register value, unmerge
// from a widened register (e.g. <3 x s16> -> <4 x s16>)
B.setInsertPt(B.getMBB(), ++B.getInsertPt());
WideLoad = Helper.widenWithUnmerge(WideTy, ValReg);
B.setInsertPt(B.getMBB(), MI.getIterator());
B.buildLoadFromOffset(WideLoad, PtrReg, *MMO, 0);
WideLoad = B.buildLoadFromOffset(WideTy, PtrReg, *MMO, 0).getReg(0);
B.buildDeleteTrailingVectorElements(ValReg, WideLoad);
}
}

Expand Down Expand Up @@ -3811,6 +3816,10 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
llvm_unreachable("invalid data type");
}

if (StoreVT == LLT::fixed_vector(3, S16)) {
Reg = B.buildPadVectorWithUndefElements(LLT::fixed_vector(4, S16), Reg)
.getReg(0);
}
return Reg;
}

Expand Down Expand Up @@ -4653,9 +4662,23 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// Deal with the one annoying legal case.
const LLT V3S16 = LLT::fixed_vector(3, 16);
if (Ty == V3S16) {
padWithUndef(ResTy, RegsToCover - ResultRegs.size() + 1);
auto Concat = B.buildConcatVectors(LLT::fixed_vector(6, 16), ResultRegs);
B.buildUnmerge({DstReg, MRI->createGenericVirtualRegister(V3S16)}, Concat);
if (IsTFE) {
if (ResultRegs.size() == 1) {
NewResultReg = ResultRegs[0];
} else if (ResultRegs.size() == 2) {
LLT V4S16 = LLT::fixed_vector(4, 16);
NewResultReg = B.buildConcatVectors(V4S16, ResultRegs).getReg(0);
} else {
return false;
}
}

if (MRI->getType(DstReg).getNumElements() <
MRI->getType(NewResultReg).getNumElements()) {
B.buildDeleteTrailingVectorElements(DstReg, NewResultReg);
} else {
B.buildPadVectorWithUndefElements(DstReg, NewResultReg);
}
return true;
}

Expand Down
21 changes: 14 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1162,18 +1162,25 @@ bool AMDGPURegisterBankInfo::applyMappingLoad(MachineInstr &MI,
// 96-bit loads are only available for vector loads. We need to split this
// into a 64-bit part, and 32 (unless we can widen to a 128-bit load).
if (MMO->getAlign() < Align(16)) {
MachineFunction *MF = MI.getParent()->getParent();
ApplyRegBankMapping ApplyBank(*this, MRI, DstBank);
MachineIRBuilder B(MI, ApplyBank);
LegalizerHelper Helper(*MF, ApplyBank, B);
LLT Part64, Part32;
std::tie(Part64, Part32) = splitUnequalType(LoadTy, 64);
auto Load0 = B.buildLoadFromOffset(Part64, PtrReg, *MMO, 0);
auto Load1 = B.buildLoadFromOffset(Part32, PtrReg, *MMO, 8);

auto Undef = B.buildUndef(LoadTy);
auto Ins0 = B.buildInsert(LoadTy, Undef, Load0, 0);
B.buildInsert(MI.getOperand(0), Ins0, Load1, 64);
if (Helper.reduceLoadStoreWidth(cast<GAnyLoad>(MI), 0, Part64) !=
LegalizerHelper::Legalized)
return false;
return true;
} else {
LLT WiderTy = widen96To128(LoadTy);
auto WideLoad = B.buildLoadFromOffset(WiderTy, PtrReg, *MMO, 0);
B.buildExtract(MI.getOperand(0), WideLoad, 0);
if (WiderTy.isScalar())
B.buildTrunc(MI.getOperand(0), WideLoad);
else {
B.buildDeleteTrailingVectorElements(MI.getOperand(0).getReg(),
WideLoad);
}
}
}

Expand Down
8 changes: 0 additions & 8 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,6 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--"

; BIG-ENDIAN: unable to translate in big endian mode
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(<28 x s32>) = G_CONCAT_VECTORS %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>), %{{[0-9]+}}:_(<4 x s32>) (in function: odd_vector)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for odd_vector
; FALLBACK-WITH-REPORT-OUT-LABEL: odd_vector:
define void @odd_vector(<7 x i32>* %addr) {
%vec = load <7 x i32>, <7 x i32>* %addr
store <7 x i32> %vec, <7 x i32>* %addr
ret void
}

; Make sure we don't mess up metadata arguments.
declare void @llvm.write_register.i64(metadata, i64)
Expand Down
68 changes: 35 additions & 33 deletions llvm/test/CodeGen/AArch64/GlobalISel/call-lowering-vectors.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,31 @@
define i8 @v1s8_add(<1 x i8> %a0) {
; CHECK-LABEL: name: v1s8_add
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<8 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = bitcast <1 x i8> %a0 to i8
ret i8 %res
}

define i24 @test_v3i8(<3 x i8> %a) {
; CHECK-LABEL: name: test_v3i8
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $w0, $w1, $w2
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
; CHECK: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
; CHECK-NEXT: liveins: $w0, $w1, $w2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s24) = G_BITCAST [[TRUNC]](<3 x s8>)
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s24)
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%res = bitcast <3 x i8> %a to i24
ret i24 %res
}
Expand All @@ -35,12 +37,13 @@ define i24 @test_v3i8(<3 x i8> %a) {
define <1 x half> @test_v1s16(<1 x float> %x) {
; CHECK-LABEL: name: test_v1s16
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $d0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32)
; CHECK: $h0 = COPY [[FPTRUNC]](s16)
; CHECK: RET_ReallyLR implicit $h0
; CHECK-NEXT: liveins: $d0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[UV]](s32)
; CHECK-NEXT: $h0 = COPY [[FPTRUNC]](s16)
; CHECK-NEXT: RET_ReallyLR implicit $h0
%tmp = fptrunc <1 x float> %x to <1 x half>
ret <1 x half> %tmp
}
Expand All @@ -49,17 +52,16 @@ declare <3 x float> @bar(float)
define void @test_return_v3f32() {
; CHECK-LABEL: name: test_return_v3f32
; CHECK: bb.1 (%ir-block.0):
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK: $s0 = COPY [[DEF]](s32)
; CHECK: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<12 x s32>) = G_CONCAT_VECTORS [[BITCAST]](<4 x s32>), [[DEF1]](<4 x s32>), [[DEF1]](<4 x s32>)
; CHECK: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>), [[UV2:%[0-9]+]]:_(<3 x s32>), [[UV3:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<12 x s32>)
; CHECK: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK: RET_ReallyLR
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: $s0 = COPY [[DEF]](s32)
; CHECK-NEXT: BL @bar, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $s0, implicit-def $q0
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<4 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
; CHECK-NEXT: RET_ReallyLR
%call = call <3 x float> @bar(float undef)
ret void
}
258 changes: 132 additions & 126 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-saddsat.mir

Large diffs are not rendered by default.

211 changes: 111 additions & 100 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Large diffs are not rendered by default.

258 changes: 132 additions & 126 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-ssubsat.mir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -326,8 +326,13 @@ body: |
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C1]](s64)
; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[BUILD_VECTOR]](<2 x s64>), 32
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[EXTRACT]](s64)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[BUILD_VECTOR]](<2 x s64>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BITCAST]](s128)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C2]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[C2]](s32)
; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]]
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[OR]](s64)
%0:_(s64) = G_CONSTANT i64 0
%1:_(s64) = G_CONSTANT i64 1
%2:_(<2 x s64>) = G_BUILD_VECTOR %0, %1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s
# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck %s

---
name: test_unmerge_values_s1_trunc_v2s1_of_build_vector_v2s32
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-zext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,13 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[DEF]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s32>) = G_CONCAT_VECTORS [[AND]](<2 x s32>), [[AND1]](<2 x s32>), [[DEF1]](<2 x s32>)
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(<3 x s32>), [[UV4:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s32>)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV3]](<3 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV2]], [[C]]
; CHECK-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AND]](<2 x s32>)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[UV3]](s32), [[UV4]](s32), [[AND1]](s32)
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
%0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<3 x s16>) = G_TRUNC %0
%2:_(<3 x s32>) = G_ZEXT %1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
%0:_(p5) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
Expand All @@ -62,12 +64,14 @@ body: |
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[MV]](p4) :: (load (<4 x s32>), align 4, addrspace 4)
; GFX10-NEXT: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96
; GFX10-NEXT: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64
; GFX10-NEXT: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
; GFX10-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV6]](s32)
; GFX10-NEXT: G_STORE [[COPY4]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5)
; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX10-NEXT: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
; GFX10-NEXT: G_STORE [[COPY3]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5)
%0:_(p5) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
Expand Down
176 changes: 96 additions & 80 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -628,23 +628,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-NEXT: v_and_or_b32 v2, v2, v9, v6
; GFX9-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v8
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX9-NEXT: v_and_or_b32 v2, v4, v9, v2
; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX9-NEXT: v_and_or_b32 v3, v5, v9, s4
; GFX9-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v8
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_and_or_b32 v3, v4, v9, v3
; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0
; GFX9-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v4
; GFX9-NEXT: v_pk_add_f16 v1, v3, v1
; GFX9-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-NEXT: v_pk_add_f16 v1, v4, v1
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -657,18 +659,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v6
; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v8
; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v6
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-CONTRACT-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-CONTRACT-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-CONTRACT-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs:
Expand All @@ -680,23 +684,25 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-DENORM-NEXT: v_and_or_b32 v2, v2, v9, v6
; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-DENORM-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v8
; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX9-DENORM-NEXT: v_and_or_b32 v2, v4, v9, v2
; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v4, 16, v0
; GFX9-DENORM-NEXT: v_and_or_b32 v3, v5, v9, s4
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v8
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-DENORM-NEXT: v_and_or_b32 v3, v4, v9, v3
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v3, v0
; GFX9-DENORM-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v4
; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v3, v1
; GFX9-DENORM-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v2, v0
; GFX9-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v4, v1
; GFX9-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-DENORM-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-DENORM-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
Expand All @@ -709,18 +715,20 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v7
; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v6
; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v8
; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v6
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v3, v3, v9, s4
; GFX9-UNSAFE-NEXT: v_and_or_b32 v5, v5, v9, s4
; GFX9-UNSAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v0
; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX9-UNSAFE-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX9-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v2
; GFX9-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_3xhalf_add_mul_rhs:
Expand All @@ -734,47 +742,51 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-NEXT: v_and_or_b32 v3, v3, v8, s4
; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-NEXT: v_and_or_b32 v2, v2, v8, v7
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-NEXT: v_and_or_b32 v2, v3, v8, s4
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v4
; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-NEXT: v_pk_mul_f16 v1, v1, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX10-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-NEXT: v_and_or_b32 v2, v4, v8, v2
; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-NEXT: v_pk_add_f16 v0, v2, v0
; GFX10-NEXT: v_and_or_b32 v2, v5, v8, s4
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v6
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-NEXT: v_and_or_b32 v3, v4, v8, v3
; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v5
; GFX10-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-NEXT: v_pk_add_f16 v1, v2, v1
; GFX10-NEXT: v_pk_add_f16 v0, v3, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-NEXT: v_and_or_b32 v0, v0, v8, v3
; GFX10-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-CONTRACT: ; %bb.0: ; %.entry
; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-CONTRACT-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v9, 16, v4
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v9, 0xffff
; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; GFX10-CONTRACT-NEXT: v_and_or_b32 v3, v3, v7, s4
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX10-CONTRACT-NEXT: v_and_or_b32 v5, v5, v7, s4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v7, v6
; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v7, v8
; GFX10-CONTRACT-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v7, v9
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v2, v9, v7
; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v4, v9, v8
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v7, s4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v2, v3, v9, s4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX10-CONTRACT-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v2, v4
; GFX10-CONTRACT-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-CONTRACT-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX10-CONTRACT-NEXT: v_and_or_b32 v0, v0, v9, v3
; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs:
Expand All @@ -788,47 +800,51 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-DENORM-NEXT: v_and_or_b32 v3, v3, v8, s4
; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v2, v8, v7
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v3, v8, s4
; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v4
; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v2, 16, v4
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v2
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v2, 16, v2
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v4, v8, v2
; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v6
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v2, v0
; GFX10-DENORM-NEXT: v_and_or_b32 v2, v5, v8, s4
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v5, 16, v6
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-DENORM-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-DENORM-NEXT: v_and_or_b32 v3, v4, v8, v3
; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v5
; GFX10-DENORM-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v2, v1
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v3, v0
; GFX10-DENORM-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-DENORM-NEXT: v_and_or_b32 v1, v1, v8, s4
; GFX10-DENORM-NEXT: v_and_or_b32 v0, v0, v8, v3
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v6, 16, v0
; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v7, 0xffff
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v2
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v9, 16, v4
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v7, 16, v2
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v8, 16, v4
; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v9, 0xffff
; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v6, 16, v6
; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v7, 16, v7
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v8, 16, v8
; GFX10-UNSAFE-NEXT: v_and_or_b32 v3, v3, v7, s4
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v9, 16, v9
; GFX10-UNSAFE-NEXT: v_and_or_b32 v5, v5, v7, s4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v7, v6
; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v7, v8
; GFX10-UNSAFE-NEXT: s_lshl_b32 s4, s4, 16
; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v7, v9
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v6
; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v2, v9, v7
; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v4, v9, v8
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v7, s4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v2, v3, v9, s4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v4, v5, v9, s4
; GFX10-UNSAFE-NEXT: v_lshrrev_b32_e32 v3, 16, v0
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v2, v4
; GFX10-UNSAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
; GFX10-UNSAFE-NEXT: v_and_or_b32 v1, v1, v9, s4
; GFX10-UNSAFE-NEXT: v_and_or_b32 v0, v0, v9, v3
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
%a = fmul <3 x half> %x, %y
Expand Down
2,029 changes: 1,049 additions & 980 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48
; GCN-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64
; GCN-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80
; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GCN-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96
; GCN-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112
; GCN-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GCN-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
; GCN-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160
; GCN-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176
Expand All @@ -38,8 +38,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208
; GCN-NEXT: s_waitcnt vmcnt(7)
; GCN-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
; GCN-NEXT: s_waitcnt vmcnt(7)
; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GCN-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3]
; GCN-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16
; GCN-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32
Expand All @@ -48,6 +46,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GCN-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80
; GCN-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96
; GCN-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112
; GCN-NEXT: s_waitcnt vmcnt(15)
; GCN-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GCN-NEXT: s_endpgm
;
; GFX10-LABEL: v_insert_v64i32_37:
Expand All @@ -58,13 +58,13 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: s_clause 0xf
; GFX10-NEXT: global_load_dwordx4 v[32:35], v64, s[0:1]
; GFX10-NEXT: global_load_dwordx4 v[36:39], v64, s[0:1] offset:16
; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GFX10-NEXT: global_load_dwordx4 v[40:43], v64, s[0:1] offset:32
; GFX10-NEXT: global_load_dwordx4 v[44:47], v64, s[0:1] offset:48
; GFX10-NEXT: global_load_dwordx4 v[48:51], v64, s[0:1] offset:64
; GFX10-NEXT: global_load_dwordx4 v[52:55], v64, s[0:1] offset:80
; GFX10-NEXT: global_load_dwordx4 v[56:59], v64, s[0:1] offset:96
; GFX10-NEXT: global_load_dwordx4 v[60:63], v64, s[0:1] offset:112
; GFX10-NEXT: global_load_dwordx4 v[0:3], v64, s[0:1] offset:128
; GFX10-NEXT: global_load_dwordx4 v[4:7], v64, s[0:1] offset:144
; GFX10-NEXT: global_load_dwordx4 v[8:11], v64, s[0:1] offset:160
; GFX10-NEXT: global_load_dwordx4 v[12:15], v64, s[0:1] offset:176
Expand All @@ -84,8 +84,6 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: global_store_dwordx4 v64, v[16:19], s[2:3] offset:192
; GFX10-NEXT: s_waitcnt vmcnt(2)
; GFX10-NEXT: global_store_dwordx4 v64, v[20:23], s[2:3] offset:208
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
; GFX10-NEXT: global_store_dwordx4 v64, v[32:35], s[2:3]
; GFX10-NEXT: global_store_dwordx4 v64, v[36:39], s[2:3] offset:16
; GFX10-NEXT: global_store_dwordx4 v64, v[40:43], s[2:3] offset:32
Expand All @@ -94,6 +92,8 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
; GFX10-NEXT: global_store_dwordx4 v64, v[52:55], s[2:3] offset:80
; GFX10-NEXT: global_store_dwordx4 v64, v[56:59], s[2:3] offset:96
; GFX10-NEXT: global_store_dwordx4 v64, v[60:63], s[2:3] offset:112
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: global_store_dwordx4 v64, v[24:27], s[2:3] offset:224
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v64, v[28:31], s[2:3] offset:240
; GFX10-NEXT: s_endpgm
Expand Down
153 changes: 0 additions & 153 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir
Original file line number Diff line number Diff line change
Expand Up @@ -198,156 +198,3 @@ body: |
S_ENDPGM 0, implicit %1, implicit %2
...

---
name: extract_sgpr_s32_from_v3s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2
; CHECK-LABEL: name: extract_sgpr_s32_from_v3s32
; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub2
; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]]
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
%1:sgpr(s32) = G_EXTRACT %0, 0
%2:sgpr(s32) = G_EXTRACT %0, 32
%3:sgpr(s32) = G_EXTRACT %0, 64
S_ENDPGM 0, implicit %0, implicit %2, implicit %3
...

---
name: extract_sgpr_v2s32_from_v3s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2
; CHECK-LABEL: name: extract_sgpr_v2s32_from_v3s32
; CHECK: [[COPY:%[0-9]+]]:sgpr_96_with_sub0_sub1 = COPY $sgpr0_sgpr1_sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY [[COPY]].sub0_sub1
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<3 x s32>) = COPY $sgpr0_sgpr1_sgpr2
%1:sgpr(<2 x s32>) = G_EXTRACT %0, 0
S_ENDPGM 0, implicit %1
...

---
name: extract_sgpr_v3s32_from_v4s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-LABEL: name: extract_sgpr_v3s32_from_v4s32
; CHECK: [[COPY:%[0-9]+]]:sgpr_128_with_sub1_sub2_sub3 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK: [[COPY1:%[0-9]+]]:sgpr_128_with_sub0_sub1_sub2 = COPY [[COPY]]
; CHECK: [[COPY2:%[0-9]+]]:sgpr_96 = COPY [[COPY1]].sub0_sub1_sub2
; CHECK: [[COPY3:%[0-9]+]]:sgpr_96 = COPY [[COPY]].sub1_sub2_sub3
; CHECK: S_ENDPGM 0, implicit [[COPY2]], implicit [[COPY3]]
%0:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
%1:sgpr(<3 x s32>) = G_EXTRACT %0, 0
%2:sgpr(<3 x s32>) = G_EXTRACT %0, 32
S_ENDPGM 0, implicit %1, implicit %2
...

---
name: extract_sgpr_v2s16_from_v4s16_offset0
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset0
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_EXTRACT %0, 0
S_ENDPGM 0, implicit %1
...

---
name: extract_sgpr_v2s16_from_v4s16_offset32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: extract_sgpr_v2s16_from_v4s16_offset32
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = G_EXTRACT %0, 32
S_ENDPGM 0, implicit %1
...

# FIXME: Probably should not be legal
---
name: extract_sgpr_s16_from_v4s16_offset0
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(s16) = G_EXTRACT %0, 0
S_ENDPGM 0, implicit %1
...

# FIXME: Probably should not be legal
---
name: extract_sgpr_s16_from_v4s16_offset32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1
; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(s16) = G_EXTRACT %0, 32
S_ENDPGM 0, implicit %1
...

# FIXME: Probably should not be legal
---
name: extract_sgpr_s16_from_v6s16_offset32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2
; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32
; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
; CHECK: S_ENDPGM 0, implicit [[COPY1]]
%0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2
%1:sgpr(s16) = G_EXTRACT %0, 32
S_ENDPGM 0, implicit %1
...
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fract.f64.mir
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,7 @@ body: |
%7:sgpr(s64) = G_CONSTANT i64 36
%8:sgpr(p4) = G_PTR_ADD %2, %7(s64)
%9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
%10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0
%13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64
%10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>)
%15:sgpr(p1) = G_INTTOPTR %13(s64)
%18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1)
%19:sgpr(s64) = G_FCONSTANT double -0.000000e+00
Expand Down Expand Up @@ -82,8 +81,7 @@ body: |
%7:sgpr(s64) = G_CONSTANT i64 36
%8:sgpr(p4) = G_PTR_ADD %2, %7(s64)
%9:sgpr(<2 x s64>) = G_LOAD %8(p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4)
%10:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 0
%13:sgpr(s64) = G_EXTRACT %9(<2 x s64>), 64
%10:sgpr(s64), %13:sgpr(s64) = G_UNMERGE_VALUES %9(<2 x s64>)
%15:sgpr(p1) = G_INTTOPTR %13(s64)
%18:sgpr(s64) = G_LOAD %15(p1) :: (load (s64), addrspace 1)
%19:sgpr(s64) = G_FABS %18
Expand Down
39 changes: 0 additions & 39 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
Original file line number Diff line number Diff line change
Expand Up @@ -566,42 +566,3 @@ body: |
%2:vgpr(s256) = G_INSERT %0, %1, 128
S_ENDPGM 0, implicit %2
...

---
name: insert_sgpr_v2s16_to_v4s16_offset0
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2
; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset0
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0
; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = COPY $sgpr2
%2:sgpr(<4 x s16>) = G_INSERT %0, %1, 0
S_ENDPGM 0, implicit %2
...

---
name: insert_sgpr_v2s16_to_v4s16_offset32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1, $sgpr2
; CHECK-LABEL: name: insert_sgpr_v2s16_to_v4s16_offset32
; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_64 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1
; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
%0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1
%1:sgpr(<2 x s16>) = COPY $sgpr2
%2:sgpr(<4 x s16>) = G_INSERT %0, %1, 32
S_ENDPGM 0, implicit %2
...
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s

# ERR: remark: <unknown>:0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0)
# ERR-NEXT: remark: <unknown>:0:0: cannot select: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0)
# ERR: remark: <unknown>:0:0: instruction is not legal: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_s16_to_v4s16_offset0)
# ERR-NEXT: <unknown>:0:0: instruction is not legal: %2:sgpr(<16 x s32>) = G_INSERT %0:sgpr, %1:sgpr(<8 x s32>), 0 (in function: insert_sgpr_v8s32_to_v16s32_offset0)
# ERR-NOT: remark

# FIXME: This 16-bit insert source should not be legal and this test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2078,11 +2078,11 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3i16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <3 x i16> @external_v3i16_func_void()
store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef
Expand Down Expand Up @@ -2252,11 +2252,11 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
; GCN-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v3f16_func_void, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
; GCN-NEXT: [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[DEF1]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; GCN-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; GCN-NEXT: S_ENDPGM 0
%val = call <3 x half> @external_v3f16_func_void()
store volatile <3 x half> %val, <3 x half> addrspace(1)* undef
Expand Down
194 changes: 100 additions & 94 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1230,12 +1230,12 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
Expand Down Expand Up @@ -1267,12 +1267,12 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<5 x s16>), [[UV1:%[0-9]+]]:_(<5 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<10 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[UV]](<5 x s16>), [[DEF1]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY4]]
store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
Expand Down Expand Up @@ -1363,12 +1363,12 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.1, align 16, addrspace 5)
; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, addrspace 5)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<130 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<65 x s16>), [[UV1:%[0-9]+]]:_(<65 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<130 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<66 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>), [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[COPY10]](<2 x s16>), [[COPY11]](<2 x s16>), [[COPY12]](<2 x s16>), [[COPY13]](<2 x s16>), [[COPY14]](<2 x s16>), [[COPY15]](<2 x s16>), [[COPY16]](<2 x s16>), [[COPY17]](<2 x s16>), [[COPY18]](<2 x s16>), [[COPY19]](<2 x s16>), [[COPY20]](<2 x s16>), [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>), [[COPY23]](<2 x s16>), [[COPY24]](<2 x s16>), [[COPY25]](<2 x s16>), [[COPY26]](<2 x s16>), [[COPY27]](<2 x s16>), [[COPY28]](<2 x s16>), [[COPY29]](<2 x s16>), [[COPY30]](<2 x s16>), [[LOAD]](<2 x s16>), [[LOAD1]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16)
; CHECK-NEXT: [[COPY31:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[UV]](<65 x s16>), [[DEF1]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
; CHECK-NEXT: [[COPY32:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY31]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY32]]
store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
Expand Down Expand Up @@ -1680,12 +1680,12 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[UV]](<3 x s16>), [[DEF1]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK-NEXT: G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
; CHECK-NEXT: S_SETPC_B64_return [[COPY3]]
store <3 x half> %arg0, <3 x half> addrspace(1)* undef
Expand Down
17 changes: 9 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1383,9 +1383,9 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
; GCN-NEXT: [[COPY8:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>), [[DEF]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
; GCN-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY8]](<2 x s16>), [[COPY9]](<2 x s16>)
; GCN-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
; GCN-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
; GCN-NEXT: [[COPY10:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; GCN-NEXT: [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @v3i16_fastcc_v3i16
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
Expand All @@ -1396,11 +1396,12 @@ define hidden fastcc <3 x i16> @sibling_call_v3i16_fastcc_v3i16(<3 x i16> %a) #1
; GCN-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY2]]
; GCN-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY1]]
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
; GCN-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UV]](<3 x s16>), [[DEF1]](<3 x s16>)
; GCN-NEXT: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
; GCN-NEXT: $vgpr0 = COPY [[UV2]](<2 x s16>)
; GCN-NEXT: $vgpr1 = COPY [[UV3]](<2 x s16>)
; GCN-NEXT: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<3 x s16>)
; GCN-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
; GCN-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[DEF]](s16)
; GCN-NEXT: [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BUILD_VECTOR1]](<4 x s16>)
; GCN-NEXT: $vgpr0 = COPY [[UV7]](<2 x s16>)
; GCN-NEXT: $vgpr1 = COPY [[UV8]](<2 x s16>)
; GCN-NEXT: [[COPY19:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY19]](<4 x s32>)
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY11]](p4)
Expand Down
633 changes: 340 additions & 293 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-and.mir

Large diffs are not rendered by default.

5 changes: 1 addition & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,7 @@ body: |
; CHECK-LABEL: name: test_anyext_v3s16_to_v3s32
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT]](<3 x s16>), 0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
Expand Down
104 changes: 51 additions & 53 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9PLUS %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9PLUS %s

---
name: test_ashr_s32_s32
Expand Down Expand Up @@ -532,43 +532,40 @@ body: |
; SI-LABEL: name: test_ashr_v3s64_v3s32
; SI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; SI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; SI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
; SI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
; SI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
; SI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; SI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; SI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; SI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; SI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
; SI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
; SI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
; SI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
; SI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; VI-LABEL: name: test_ashr_v3s64_v3s32
; VI: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; VI-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; VI-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
; VI-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
; VI-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
; VI-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; VI-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; VI-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; VI-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; VI-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
; VI-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
; VI-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
; VI-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
; VI-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
; GFX9PLUS-LABEL: name: test_ashr_v3s64_v3s32
; GFX9PLUS: [[COPY:%[0-9]+]]:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<3 x s64>) = G_EXTRACT [[COPY]](<4 x s64>), 0
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s64>)
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[EXTRACT]](<3 x s64>)
; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV3]](s32)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV4]](s32)
; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV5]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64)
; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<3 x s32>)
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[UV4]](s32)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[UV5]](s32)
; GFX9PLUS-NEXT: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[UV6]](s32)
; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s64>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s64>) = G_INSERT [[DEF]], [[BUILD_VECTOR]](<3 x s64>), 0
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[INSERT]](<4 x s64>)
; GFX9PLUS-NEXT: [[UV7:%[0-9]+]]:_(s64), [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64), [[UV10:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[DEF]](<4 x s64>)
; GFX9PLUS-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64), [[ASHR2]](s64), [[UV10]](s64)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<4 x s64>)
%0:_(<4 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
%1:_(<3 x s64>) = G_EXTRACT %0, 0
%2:_(<3 x s32>) = COPY $vgpr8_vgpr9_vgpr10
Expand Down Expand Up @@ -798,32 +795,33 @@ body: |
; GFX9PLUS-LABEL: name: test_ashr_v3s16_v3s16
; GFX9PLUS: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
; GFX9PLUS-NEXT: [[COPY1:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9PLUS-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
; GFX9PLUS-NEXT: [[EXTRACT:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT]](<4 x s16>), 0
; GFX9PLUS-NEXT: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV]](<3 x s16>), 0
; GFX9PLUS-NEXT: [[EXTRACT1:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT1]](<4 x s16>), 32
; GFX9PLUS-NEXT: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
; GFX9PLUS-NEXT: [[EXTRACT2:%[0-9]+]]:_(<2 x s16>) = G_EXTRACT [[INSERT2]](<4 x s16>), 0
; GFX9PLUS-NEXT: [[INSERT3:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[UV2]](<3 x s16>), 0
; GFX9PLUS-NEXT: [[EXTRACT3:%[0-9]+]]:_(s16) = G_EXTRACT [[INSERT3]](<4 x s16>), 32
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[EXTRACT]], [[EXTRACT2]](<2 x s16>)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[EXTRACT1]], [[EXTRACT3]](s16)
; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
; GFX9PLUS-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
; GFX9PLUS-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
; GFX9PLUS-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9PLUS-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9PLUS-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
; GFX9PLUS-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9PLUS-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9PLUS-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9PLUS-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9PLUS-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9PLUS-NEXT: [[ASHR:%[0-9]+]]:_(<2 x s16>) = G_ASHR [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
; GFX9PLUS-NEXT: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[TRUNC]], [[TRUNC1]](s16)
; GFX9PLUS-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[ASHR]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9PLUS-NEXT: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9PLUS-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
; GFX9PLUS-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9PLUS-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
; GFX9PLUS-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV7]](<2 x s16>)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST4]](s32), [[LSHR2]](s32)
; GFX9PLUS-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[ASHR1]](s16)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST1]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST2]](s32)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>), [[BUILD_VECTOR_TRUNC2]](<2 x s16>)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ANYEXT]](s32), [[BITCAST5]](s32)
; GFX9PLUS-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR3]](s32), [[BITCAST6]](s32)
; GFX9PLUS-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9PLUS-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
%1:_(<6 x s16>) = COPY $vgpr3_vgpr4_vgpr5
Expand Down
3,449 changes: 1,796 additions & 1,653 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer %s -o - | FileCheck %s

---
name: concat_vectors_v2s32_v2s32
Expand Down
314 changes: 61 additions & 253 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir

Large diffs are not rendered by default.

218 changes: 102 additions & 116 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract.mir

Large diffs are not rendered by default.

55 changes: 20 additions & 35 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fabs.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s


---
Expand Down Expand Up @@ -231,25 +231,19 @@ body: |
; SI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; SI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; SI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; SI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; SI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; SI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; SI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; SI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; SI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
; SI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
; SI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; SI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
; SI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; SI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
; SI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
; SI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; SI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; SI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
; SI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; SI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; SI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
; VI-LABEL: name: test_fabs_v3s16
; VI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
Expand All @@ -274,25 +268,19 @@ body: |
; VI-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; VI-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; VI-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; VI-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; VI-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[BITCAST4]], [[C1]]
; VI-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
; VI-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C]](s32)
; VI-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND3]], [[SHL2]]
; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
; VI-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[BITCAST5]], [[C1]]
; VI-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[BITCAST6]], [[C1]]
; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C]](s32)
; VI-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32)
; VI-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[AND5]], [[SHL3]]
; VI-NEXT: [[BITCAST9:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[LSHR2]], [[C1]]
; VI-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[BITCAST7]], [[C1]]
; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[SHL4]]
; VI-NEXT: [[BITCAST10:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST8]](<2 x s16>), [[BITCAST9]](<2 x s16>), [[BITCAST10]](<2 x s16>)
; VI-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR3]](s32)
; VI-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32)
; VI-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL4]]
; VI-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR4]](s32)
; VI-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BITCAST6]](<2 x s16>), [[BITCAST7]](<2 x s16>), [[BITCAST8]](<2 x s16>)
; VI-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
; GFX9-LABEL: name: test_fabs_v3s16
; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
Expand All @@ -309,12 +297,9 @@ body: |
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FABS]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FABS1]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST4]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR2]](s32), [[BITCAST5]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[BITCAST2]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[LSHR1]](s32), [[BITCAST3]](s32)
; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC2]](<2 x s16>), [[BUILD_VECTOR_TRUNC3]](<2 x s16>), [[BUILD_VECTOR_TRUNC4]](<2 x s16>)
; GFX9-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<6 x s16>)
%0:_(<3 x s16>) = G_IMPLICIT_DEF
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fadd.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s

---
name: test_fadd_s32
Expand Down Expand Up @@ -446,21 +446,21 @@ body: |
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9-NEXT: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<6 x s16>)
; GFX9-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
; GFX9-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST]](s32), [[LSHR]](s32)
; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST1]](s32), [[DEF]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST2]](s32), [[LSHR1]](s32)
; GFX9-NEXT: [[BUILD_VECTOR_TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[BITCAST3]](s32), [[DEF]](s32)
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC2]]
; GFX9-NEXT: [[FADD1:%[0-9]+]]:_(<2 x s16>) = G_FADD [[BUILD_VECTOR_TRUNC1]], [[BUILD_VECTOR_TRUNC3]]
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[FADD]](<2 x s16>)
; GFX9-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
; GFX9-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[FADD1]](<2 x s16>)
; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF1]](<4 x s16>)
; GFX9-NEXT: [[BITCAST6:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
; GFX9-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST6]], [[C]](s32)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s


---
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcos.mir
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -o - %s | FileCheck -check-prefix=SI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -o - %s | FileCheck -check-prefix=VI %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s

---
name: test_fcos_s32
Expand Down
Loading