Skip to content

Commit

Permalink
GlobalISel: Do not change register types in lowerLoad
Browse files Browse the repository at this point in the history
Adjusting the load register type is a widenScalar type action, not a
lowering. lowerLoad should be reserved for operations that change the
memory access size, such as unaligned load decomposition. With this
trying to adjust the register type, it was hard to avoid infinite
loops in the legalizer. Adds a bandaid to avoid regressing a few
AArch64 tests, but I'm not sure what the exact condition is and
there's probably a cleaner way to do this.

For AMDGPU this regresses handling of some cases for unaligned loads,
but the way this is currently working is a pretty ugly hack.
  • Loading branch information
arsenm committed May 27, 2021
1 parent 7922ff6 commit e892705
Show file tree
Hide file tree
Showing 11 changed files with 110 additions and 353 deletions.
22 changes: 0 additions & 22 deletions llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Expand Up @@ -2810,28 +2810,6 @@ LegalizerHelper::lowerLoad(MachineInstr &MI) {
return Legalized;
}

if (DstTy.isScalar()) {
Register TmpReg =
MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
case TargetOpcode::G_LOAD:
MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
break;
case TargetOpcode::G_SEXTLOAD:
MIRBuilder.buildSExt(DstReg, TmpReg);
break;
case TargetOpcode::G_ZEXTLOAD:
MIRBuilder.buildZExt(DstReg, TmpReg);
break;
}

MI.eraseFromParent();
return Legalized;
}

return UnableToLegalize;
}

Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
Expand Up @@ -294,11 +294,18 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalForTypesWithMemDesc({{s32, p0, 8, 8}, {s32, p0, 16, 8}})
.clampScalar(0, s8, s64)
.lowerIfMemSizeNotPow2()
.widenScalarToNextPow2(0)
.narrowScalarIf([=](const LegalityQuery &Query) {
// Clamp extending load results to 32-bits.
return Query.Types[0].isScalar() &&
Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits &&
Query.Types[0].getSizeInBits() > 32;
},
changeTo(0, s32))
// Lower any any-extending loads left into G_ANYEXT and G_LOAD
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].getSizeInBits() != Query.MMODescrs[0].SizeInBits;
})
.widenScalarToNextPow2(0)
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
Expand Down
29 changes: 18 additions & 11 deletions llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
Expand Up @@ -222,7 +222,9 @@ static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
};
}

static LegalityPredicate isWideScalarTruncStore(unsigned TypeIdx) {
// If we have a truncating store or an extending load with a data size larger
// than 32-bits, we need to reduce to a 32-bit type.
static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[TypeIdx];
return !Ty.isVector() && Ty.getSizeInBits() > 32 &&
Expand Down Expand Up @@ -274,6 +276,10 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
if (AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
return false;

// Do not handle extending vector loads.
if (Ty.isVector() && MemSize != RegSize)
return false;

// TODO: We should be able to widen loads if the alignment is high enough, but
// we also need to modify the memory access size.
#if 0
Expand Down Expand Up @@ -1249,15 +1255,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return std::make_pair(0, EltTy);
})
.lowerIfMemSizeNotPow2()
.minScalar(0, S32);

if (IsStore)
Actions.narrowScalarIf(isWideScalarTruncStore(0), changeTo(0, S32));

Actions
.widenScalarToNextPow2(0)
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
.lower();
.minScalar(0, S32)
.narrowScalarIf(isWideScalarExtLoadTruncStore(0), changeTo(0, S32))
.widenScalarToNextPow2(0)
.moreElementsIf(vectorSmallerThan(0, 32), moreEltsToNext32Bit(0))
.lower();
}

auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
Expand All @@ -1268,7 +1270,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{S32, PrivatePtr, 8, 8},
{S32, PrivatePtr, 16, 16},
{S32, ConstantPtr, 8, 8},
{S32, ConstantPtr, 16, 2 * 8}});
{S32, ConstantPtr, 16, 2 * 8}})
.legalIf(
[=](const LegalityQuery &Query) -> bool {
return isLoadStoreLegal(ST, Query);
});

if (ST.hasFlatAddressSpace()) {
ExtLoads.legalForTypesWithMemDesc(
{{S32, FlatPtr, 8, 8}, {S32, FlatPtr, 16, 16}});
Expand Down
42 changes: 18 additions & 24 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
Expand Up @@ -842,18 +842,15 @@ body: |
; SI: $vgpr0 = COPY [[ANYEXT]](s32)
; CI-HSA-LABEL: name: test_load_global_s24_align1
; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
; CI-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; CI-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
; CI-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; CI-HSA: $vgpr0 = COPY [[COPY2]](s32)
; CI-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
; CI-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; CI-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; CI-HSA: $vgpr0 = COPY [[COPY1]](s32)
; CI-MESA-LABEL: name: test_load_global_s24_align1
; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; CI-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
Expand All @@ -878,18 +875,15 @@ body: |
; VI: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-HSA-LABEL: name: test_load_global_s24_align1
; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
; GFX9-HSA: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; GFX9-HSA: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
; GFX9-HSA: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; GFX9-HSA: $vgpr0 = COPY [[COPY2]](s32)
; GFX9-HSA: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1 from unknown-address + 2, addrspace 1)
; GFX9-HSA: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-HSA: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; GFX9-HSA: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; GFX9-HSA: $vgpr0 = COPY [[COPY1]](s32)
; GFX9-MESA-LABEL: name: test_load_global_s24_align1
; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p1) :: (load 2, align 1, addrspace 1)
Expand Down
21 changes: 9 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-local.mir
Expand Up @@ -1149,18 +1149,15 @@ body: |
; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-UNALIGNED-LABEL: name: test_load_local_s24_align1
; GFX9-UNALIGNED: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; GFX9-UNALIGNED: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C1]](s32)
; GFX9-UNALIGNED: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
; GFX9-UNALIGNED: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD1]], [[C2]](s32)
; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[AND]]
; GFX9-UNALIGNED: [[COPY2:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY2]](s32)
; GFX9-UNALIGNED: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
; GFX9-UNALIGNED: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
; GFX9-UNALIGNED: [[PTR_ADD:%[0-9]+]]:_(p3) = G_PTR_ADD [[COPY]], [[C]](s32)
; GFX9-UNALIGNED: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p3) :: (load 1 from unknown-address + 2, addrspace 3)
; GFX9-UNALIGNED: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; GFX9-UNALIGNED: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LOAD]], [[C1]](s32)
; GFX9-UNALIGNED: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL]], [[ZEXTLOAD]]
; GFX9-UNALIGNED: [[COPY1:%[0-9]+]]:_(s32) = COPY [[OR]](s32)
; GFX9-UNALIGNED: $vgpr0 = COPY [[COPY1]](s32)
; GFX10-LABEL: name: test_load_local_s24_align1
; GFX10: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
; GFX10: [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[COPY]](p3) :: (load 2, align 1, addrspace 3)
Expand Down
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -O0 -run-pass=legalizer %s -o - | FileCheck -check-prefix=CI %s
# FIXME: Run with and without unaligned access on

---
name: test_sextload_constant32bit_s64_s32_align4
Expand Down Expand Up @@ -29,19 +30,8 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 2, addrspace 6)
; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64)
; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 2 from unknown-address + 2, addrspace 6)
; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]]
; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR]](s32)
; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 2, addrspace 6)
; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p6) = COPY $sgpr0
%1:_(s64) = G_SEXTLOAD %0 :: (load 4, align 2, addrspace 6)
Expand All @@ -58,35 +48,8 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6)
; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64)
; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 6)
; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C2]](s64)
; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1 from unknown-address + 2, addrspace 6)
; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C3]](s64)
; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1 from unknown-address + 3, addrspace 6)
; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; CI: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
; CI: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C5]](s32)
; CI: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C4]]
; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C6]](s32)
; CI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
; CI: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
; CI: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CI: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C7]](s32)
; CI: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[OR2]](s32)
; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 4, align 1, addrspace 6)
; CI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s32)
; CI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p6) = COPY $sgpr0
%1:_(s64) = G_SEXTLOAD %0 :: (load 4, align 1, addrspace 6)
Expand Down Expand Up @@ -137,22 +100,8 @@ body: |
; CI: [[COPY:%[0-9]+]]:_(p6) = COPY $sgpr0
; CI: [[C:%[0-9]+]]:_(p6) = G_CONSTANT i32 0
; CI: [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](p6), [[C]](p6)
; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (load 1, addrspace 6)
; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[MV]], [[C1]](s64)
; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1 from unknown-address + 1, addrspace 6)
; CI: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 255
; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C2]]
; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C4]]
; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s32)
; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC1]]
; CI: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[OR]](s16)
; CI: $vgpr0 = COPY [[SEXT]](s32)
; CI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[MV]](p4) :: (load 2, align 1, addrspace 6)
; CI: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p6) = COPY $sgpr0
%1:_(s32) = G_SEXTLOAD %0 :: (load 2, align 1, addrspace 6)
$vgpr0 = COPY %1
Expand Down
32 changes: 11 additions & 21 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sextload-flat.mir
Expand Up @@ -13,10 +13,8 @@ body: |
; SI: $vgpr0 = COPY [[SEXTLOAD]](s32)
; VI-LABEL: name: test_sextload_flat_i32_i8
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
; VI: $vgpr0 = COPY [[SEXT_INREG]](s32)
; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1)
; VI: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 0)
$vgpr0 = COPY %1
Expand All @@ -33,10 +31,8 @@ body: |
; SI: $vgpr0 = COPY [[SEXTLOAD]](s32)
; VI-LABEL: name: test_sextload_flat_i32_i16
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
; VI: $vgpr0 = COPY [[SEXT_INREG]](s32)
; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
; VI: $vgpr0 = COPY [[SEXTLOAD]](s32)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s32) = G_SEXTLOAD %0 :: (load 2, addrspace 0)
$vgpr0 = COPY %1
Expand All @@ -54,11 +50,9 @@ body: |
; SI: $vgpr0 = COPY [[COPY1]](s32)
; VI-LABEL: name: test_sextload_flat_i31_i8
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
; VI: $vgpr0 = COPY [[COPY2]](s32)
; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32)
; VI: $vgpr0 = COPY [[COPY1]](s32)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s31) = G_SEXTLOAD %0 :: (load 1, addrspace 0)
%2:_(s32) = G_ANYEXT %1
Expand All @@ -77,10 +71,8 @@ body: |
; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
; VI-LABEL: name: test_sextload_flat_i64_i8
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 8
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32)
; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 1)
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXTLOAD %0 :: (load 1, addrspace 0)
Expand All @@ -99,10 +91,8 @@ body: |
; SI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
; VI-LABEL: name: test_sextload_flat_i64_i16
; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 2)
; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
; VI: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY1]], 16
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXT_INREG]](s32)
; VI: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
; VI: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SEXTLOAD]](s32)
; VI: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
%0:_(p0) = COPY $vgpr0_vgpr1
%1:_(s64) = G_SEXTLOAD %0 :: (load 2, addrspace 0)
Expand Down

0 comments on commit e892705

Please sign in to comment.