diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index e47f25f7828d5..aee6c0dd8a8e0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -499,13 +499,18 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const { LLT DstTy = MRI->getType(DstReg); LLT SrcTy = MRI->getType(SrcReg); const unsigned SrcSize = SrcTy.getSizeInBits(); - const unsigned DstSize = DstTy.getSizeInBits(); + unsigned DstSize = DstTy.getSizeInBits(); // TODO: Should handle any multiple of 32 offset. unsigned Offset = I.getOperand(2).getImm(); if (Offset % 32 != 0 || DstSize > 128) return false; + // 16-bit operations really use 32-bit registers. + // FIXME: Probably should not allow 16-bit G_EXTRACT results. + if (DstSize == 16) + DstSize = 32; + const TargetRegisterClass *DstRC = TRI.getConstrainedRegClassForOperand(I.getOperand(0), *MRI); if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) @@ -728,7 +733,9 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { unsigned InsSize = Src1Ty.getSizeInBits(); int64_t Offset = I.getOperand(3).getImm(); - if (Offset % 32 != 0) + + // FIXME: These cases should have been illegal and unnecessary to check here. + if (Offset % 32 != 0 || InsSize % 32 != 0) return false; unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir index 795ebc6a1a9a6..df16e9c1f0917 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract.mir @@ -255,3 +255,60 @@ body: | S_ENDPGM 0, implicit %1 ... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v4s16_offset0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s16) = G_EXTRACT %0, 0 + S_ENDPGM 0, implicit %1 + +... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v4s16_offset32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v4s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s16) = G_EXTRACT %0, 32 + S_ENDPGM 0, implicit %1 + +... + +# FIXME: Probably should not be legal +--- +name: extract_sgpr_s16_from_v6s16_offset32 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2 + ; CHECK-LABEL: name: extract_sgpr_s16_from_v6s16_offset32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr_96 = COPY $sgpr0_sgpr1_sgpr2 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]] + %0:sgpr(<6 x s16>) = COPY $sgpr0_sgpr1_sgpr2 + %1:sgpr(s16) = G_EXTRACT %0, 32 + S_ENDPGM 0, implicit %1 + +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir new file mode 100644 index 0000000000000..5e58e8b633ec4 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.xfail.mir @@ -0,0 +1,19 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s + +# FIXME: This should not be legal and this test should be deleted +# ERR: remark: :0:0: cannot select: %3:sgpr(<4 x s16>) = G_INSERT %0:sgpr, %2:sgpr(s16), 0 (in function: insert_sgpr_2s16_to_v4s16_offset0) +--- +name: insert_sgpr_2s16_to_v4s16_offset0 +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2 + %0:sgpr(<4 x s16>) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = COPY $sgpr2 + %2:sgpr(s16) = G_TRUNC %1 + %3:sgpr(<4 x s16>) = G_INSERT %0, %2, 0 + S_ENDPGM 0, implicit %3 + +...