Skip to content

Commit

Permalink
[GlobalISel] Handle non-multiples of the base type in narrowScalarInsert
Browse files Browse the repository at this point in the history
When narrowing G_INSERT, handle types that aren't a multiple of the
type we're narrowing to. This comes up if we're narrowing something
like an s96 to fit in 64 bit registers and also for non-byte multiple
packed types if they come up.

This implementation handles these cases by extending the extra bits to
the narrow size and truncating the result back to the destination
size.

Differential Revision: https://reviews.llvm.org/D97791
  • Loading branch information
bogner committed Jun 8, 2021
1 parent 30bb5dc commit 2a7e759
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 55 deletions.
57 changes: 33 additions & 24 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Expand Up @@ -5100,37 +5100,43 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;

uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
uint64_t NarrowSize = NarrowTy.getSizeInBits();

// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;

int NumParts = SizeOp0 / NarrowSize;

SmallVector<Register, 2> SrcRegs, DstRegs;
SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
SmallVector<uint64_t, 2> Indexes;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
LLT LeftoverTy;
extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
LeftoverRegs);

for (Register Reg : LeftoverRegs)
SrcRegs.push_back(Reg);

uint64_t NarrowSize = NarrowTy.getSizeInBits();
Register OpReg = MI.getOperand(2).getReg();
uint64_t OpStart = MI.getOperand(3).getImm();
uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
for (int i = 0; i < NumParts; ++i) {
unsigned DstStart = i * NarrowSize;
for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
unsigned DstStart = I * NarrowSize;

if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
// No part of the insert affects this subregister, forward the original.
DstRegs.push_back(SrcRegs[i]);
continue;
} else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
// The entire subregister is defined by this insert, forward the new
// value.
DstRegs.push_back(OpReg);
continue;
}

Register SrcReg = SrcRegs[I];
if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
// The leftover reg is smaller than NarrowTy, so we need to extend it.
SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
}

if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
// No part of the insert affects this subregister, forward the original.
DstRegs.push_back(SrcReg);
continue;
}

// OpSegStart is where this destination segment would start in OpReg if it
// extended infinitely in both directions.
int64_t ExtractOffset, InsertOffset;
Expand All @@ -5154,16 +5160,19 @@ LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
}

Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
DstRegs.push_back(DstReg);
}

assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
uint64_t WideSize = DstRegs.size() * NarrowSize;
Register DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
if (WideSize > RegTy.getSizeInBits()) {
Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
MIRBuilder.buildMerge(MergeReg, DstRegs);
MIRBuilder.buildTrunc(DstReg, MergeReg);
} else
MIRBuilder.buildMerge(DstReg, DstRegs);

MI.eraseFromParent();
return Legalized;
}
Expand Down
22 changes: 0 additions & 22 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll
Expand Up @@ -91,28 +91,6 @@ define void @nonpow2_add_narrowing(i128 %x, i128 %y) {
ret void
}

; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %{{[0-9]+}}:_(s96) = G_INSERT %{{[0-9]+}}:_, %{{[0-9]+}}:_(s32), 64 (in function: nonpow2_or_narrowing)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_or_narrowing
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_or_narrowing:
define void @nonpow2_or_narrowing() {
%a = add i128 undef, undef
%b = trunc i128 %a to i96
%a2 = add i128 undef, undef
%b2 = trunc i128 %a2 to i96
%dummy = or i96 %b, %b2
store i96 %dummy, i96* undef
ret void
}

; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to legalize instruction: %0:_(s96) = G_INSERT %10:_, %8:_(s32), 64 (in function: nonpow2_load_narrowing)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_load_narrowing
; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_load_narrowing:
define void @nonpow2_load_narrowing() {
%dummy = load i96, i96* undef
store i96 %dummy, i96* undef
ret void
}

; Currently can't handle vector lengths that aren't an exact multiple of
; natively supported vector lengths. Test that the fall-back works for those.
; FALLBACK-WITH-REPORT-ERR-G_IMPLICIT_DEF-LEGALIZABLE: (FIXME: this is what is expected once we can legalize non-pow-of-2 G_IMPLICIT_DEF) remark: <unknown>:0:0: unable to legalize instruction: %1:_(<7 x s64>) = G_ADD %0, %0 (in function: nonpow2_vector_add_fewerelements
Expand Down
74 changes: 65 additions & 9 deletions llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir
@@ -1,11 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -O0 -run-pass=legalizer %s -o - | FileCheck %s

--- |
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--"
define void @test_inserts_nonpow2() { ret void }
...
# RUN: llc -O0 -mtriple=aarch64-- -run-pass=legalizer %s -o - | FileCheck %s

---
name: test_inserts_nonpow2
Expand All @@ -15,8 +9,12 @@ body: |
; CHECK-LABEL: name: test_inserts_nonpow2
; CHECK: [[C:%[0-9]+]]:_(s64) = COPY $x3
; CHECK: $x0 = COPY [[C]]
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
; CHECK: $x0 = COPY [[COPY3]](s64)
; CHECK: RET_ReallyLR
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
Expand All @@ -27,3 +25,61 @@ body: |
$x0 = COPY %6
RET_ReallyLR
...
---
name: test_inserts_s96
body: |
bb.0:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: test_inserts_s96
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s32)
; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s32), 0
; CHECK: $x0 = COPY [[COPY3]](s64)
; CHECK: $x1 = COPY [[INSERT]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
%3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64)
%4:_(s96) = G_TRUNC %3(s128)
%5:_(s32) = G_TRUNC %2(s64)
%6:_(s96) = G_INSERT %4, %5(s32), 64
%7:_(s128) = G_ANYEXT %6(s96)
%8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7
$x0 = COPY %8
$x1 = COPY %9
...
---
name: test_inserts_s65
body: |
bb.0:
liveins: $x0, $x1, $x2
; CHECK-LABEL: name: test_inserts_s65
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s64)
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0
; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s1)
; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s1), 0
; CHECK: $x0 = COPY [[COPY3]](s64)
; CHECK: $x1 = COPY [[INSERT]](s64)
%0:_(s64) = COPY $x0
%1:_(s64) = COPY $x1
%2:_(s64) = COPY $x2
%3:_(s128) = G_MERGE_VALUES %0:_(s64), %1:_(s64)
%4:_(s65) = G_TRUNC %3(s128)
%5:_(s1) = G_TRUNC %2(s64)
%6:_(s65) = G_INSERT %4, %5(s1), 64
%7:_(s128) = G_ANYEXT %6(s65)
%8:_(s64), %9:_(s64) = G_UNMERGE_VALUES %7
$x0 = COPY %8
$x1 = COPY %9
...

0 comments on commit 2a7e759

Please sign in to comment.