diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 99a7d893c931..ede775ffef04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -646,6 +646,11 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, // Split vector extloads. unsigned MemSize = Query.MMODescrs[0].SizeInBits; + unsigned Align = Query.MMODescrs[0].AlignInBits; + + if (MemSize < DstTy.getSizeInBits()) + MemSize = std::max(MemSize, Align); + if (DstTy.isVector() && DstTy.getSizeInBits() > MemSize) return true; @@ -660,7 +665,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, if (NumRegs == 3 && !ST.hasDwordx3LoadStores()) return true; - unsigned Align = Query.MMODescrs[0].AlignInBits; if (Align < MemSize) { const SITargetLowering *TLI = ST.getTargetLowering(); return !TLI->allowsMisalignedMemoryAccessesImpl(MemSize, AS, Align / 8); @@ -802,14 +806,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, unsigned MemSize = Query.MMODescrs[0].SizeInBits; unsigned Align = Query.MMODescrs[0].AlignInBits; - // No extending vector loads. - if (Size > MemSize && Ty0.isVector()) - return false; - // FIXME: Widening store from alignment not valid. if (MemSize < Size) MemSize = std::max(MemSize, Align); + // No extending vector loads. + if (Size > MemSize && Ty0.isVector()) + return false; + switch (MemSize) { case 8: case 16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir index 82cd5cc93821..d7ff5b22df2d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-constant.mir @@ -4105,111 +4105,36 @@ body: | ; CI-LABEL: name: test_load_constant_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1, align 2, addrspace 4) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_constant_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1, align 2, addrspace 4) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_constant_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_constant_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1, align 2, addrspace 4) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_constant_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p4) :: (load 1, align 4, addrspace 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (load 1, align 2, addrspace 4) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (load 1, addrspace 4) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p4) :: (load 2, align 4, addrspace 4) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir index 91919a1302c0..98001e2f72b6 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-flat.mir @@ -4175,111 +4175,36 @@ body: | ; CI-LABEL: name: test_load_flat_v2s8_align4 ; CI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) - ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1, align 2) - ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) - ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_flat_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1, align 2) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_load_flat_v2s8_align4 ; GFX9: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) - ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1, align 2) - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) - ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_flat_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1, align 2) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_flat_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load 1, align 4) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load 1, align 2) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p0) :: (load 2, align 4) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir index 683c65d927ac..6fca297b910d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir @@ -3778,133 +3778,43 @@ body: | ; SI-LABEL: name: test_load_global_v2s8_align4 ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; SI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; SI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; SI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; SI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; SI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; SI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; SI: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-HSA-LABEL: name: test_load_global_v2s8_align4 ; CI-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; CI-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; CI-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; CI-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; CI-MESA-LABEL: name: test_load_global_v2s8_align4 ; CI-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; CI-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; CI-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CI-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CI-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; CI-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; CI-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CI-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; CI-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; CI-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CI-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; CI-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; CI-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; CI-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; CI-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; CI-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; CI-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; CI-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; CI-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; CI-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; CI-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; CI-MESA: $vgpr0 = COPY [[ANYEXT]](s32) ; VI-LABEL: name: test_load_global_v2s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; VI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; VI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; VI: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; VI: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; VI: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; VI: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-HSA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-HSA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-HSA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-HSA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-HSA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; GFX9-HSA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-HSA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-HSA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; GFX9-HSA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-HSA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-HSA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; GFX9-HSA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-HSA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-HSA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-HSA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-HSA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-HSA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9-HSA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-HSA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9-HSA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-HSA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-HSA: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-MESA-LABEL: name: test_load_global_v2s8_align4 ; GFX9-MESA: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 - ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1) - ; GFX9-MESA: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; GFX9-MESA: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; GFX9-MESA: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 1, addrspace 1) - ; GFX9-MESA: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; GFX9-MESA: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; GFX9-MESA: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p1) :: (load 1, align 2, addrspace 1) - ; GFX9-MESA: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; GFX9-MESA: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; GFX9-MESA: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p1) :: (load 1, addrspace 1) - ; GFX9-MESA: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32) - ; GFX9-MESA: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32) - ; GFX9-MESA: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32) - ; GFX9-MESA: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32) - ; GFX9-MESA: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32) - ; GFX9-MESA: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>) - ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[TRUNC]](<4 x s8>), 0 + ; GFX9-MESA: [[LOAD:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, align 4, addrspace 1) + ; GFX9-MESA: [[EXTRACT:%[0-9]+]]:_(<2 x s8>) = G_EXTRACT [[LOAD]](<4 x s8>), 0 ; GFX9-MESA: [[BITCAST:%[0-9]+]]:_(s16) = G_BITCAST [[EXTRACT]](<2 x s8>) ; GFX9-MESA: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[BITCAST]](s16) ; GFX9-MESA: $vgpr0 = COPY [[ANYEXT]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir index 2deb2e5949d0..a353dd3d2577 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store.mir @@ -385,21 +385,7 @@ body: | ; SI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) ; SI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 ; SI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; SI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) - ; SI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; SI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1) - ; SI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; SI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; SI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; SI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD]](p1) :: (store 1, addrspace 1) - ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; SI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; SI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; SI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD1]](p1) :: (store 1, align 2, addrspace 1) - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; SI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; SI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; SI: G_STORE [[ANYEXT4]](s32), [[PTR_ADD2]](p1) :: (store 1, addrspace 1) + ; SI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) ; VI-LABEL: name: test_store_global_v3s8_align4 ; VI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1 ; VI: [[DEF:%[0-9]+]]:_(<3 x s8>) = G_IMPLICIT_DEF @@ -407,21 +393,7 @@ body: | ; VI: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s8>) ; VI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s8>), 0 ; VI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>) - ; VI: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[TRUNC]](<4 x s8>) - ; VI: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; VI: G_STORE [[ANYEXT1]](s32), [[COPY]](p1) :: (store 1, align 4, addrspace 1) - ; VI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; VI: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) - ; VI: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; VI: G_STORE [[ANYEXT2]](s32), [[PTR_ADD]](p1) :: (store 1, addrspace 1) - ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 - ; VI: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; VI: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; VI: G_STORE [[ANYEXT3]](s32), [[PTR_ADD1]](p1) :: (store 1, align 2, addrspace 1) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 - ; VI: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; VI: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; VI: G_STORE [[ANYEXT4]](s32), [[PTR_ADD2]](p1) :: (store 1, addrspace 1) + ; VI: G_STORE [[TRUNC]](<4 x s8>), [[COPY]](p1) :: (store 3, align 4, addrspace 1) %0:_(p1) = COPY $vgpr0_vgpr1 %1:_(<3 x s8>) = G_IMPLICIT_DEF G_STORE %1, %0 :: (store 3, addrspace 1, align 4)