diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index b8761d971a67d..578fd321b4873 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -708,8 +708,32 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg)); const MachineInstr *DefMI = MRI.getVRegDef(VReg); unsigned Opc = 0; - unsigned SrcOpNum = 0; + unsigned SrcReg = 0; switch (DefMI->getOpcode()) { + case AArch64::SUBREG_TO_REG: + // Check for the following way to define an 64-bit immediate: + // %0:gpr32 = MOVi32imm 1 + // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32 + if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0) + return 0; + if (!DefMI->getOperand(2).isReg()) + return 0; + if (!DefMI->getOperand(3).isImm() || + DefMI->getOperand(3).getImm() != AArch64::sub_32) + return 0; + DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg()); + if (DefMI->getOpcode() != AArch64::MOVi32imm) + return 0; + // fall-through to MOVi32imm case. + [[fallthrough]]; + case AArch64::MOVi32imm: + case AArch64::MOVi64imm: + if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1) + return 0; + SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR; + Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; + break; + case AArch64::ADDSXri: case AArch64::ADDSWri: // if NZCV is used, do not fold. @@ -724,7 +748,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 || DefMI->getOperand(3).getImm() != 0) return 0; - SrcOpNum = 1; + SrcReg = DefMI->getOperand(1).getReg(); Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr; break; @@ -734,7 +758,7 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; - SrcOpNum = 2; + SrcReg = DefMI->getOperand(2).getReg(); Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr; break; } @@ -753,17 +777,17 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg()); if (ZReg != AArch64::XZR && ZReg != AArch64::WZR) return 0; - SrcOpNum = 2; + SrcReg = DefMI->getOperand(2).getReg(); Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr; break; } default: return 0; } - assert(Opc && SrcOpNum && "Missing parameters"); + assert(Opc && SrcReg && "Missing parameters"); if (NewVReg) - *NewVReg = DefMI->getOperand(SrcOpNum).getReg(); + *NewVReg = SrcReg; return Opc; } @@ -976,6 +1000,14 @@ void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB, // Fold the operation. Leave any dead instructions for DCE to clean up. if (FoldedOpc) { + // NewVReg might be XZR/WZR. In that case create a COPY into a virtual + // register. + if (!Register::isVirtualRegister(NewVReg)) { + unsigned ZeroReg = NewVReg; + NewVReg = MRI.createVirtualRegister(RC); + BuildMI(MBB, I, DL, get(TargetOpcode::COPY), NewVReg).addReg(ZeroReg); + } + FalseReg = NewVReg; Opc = FoldedOpc; // The extends the live range of NewVReg. diff --git a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll index 97a7741bcde75..849323f0fedf3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-early-ifcvt.ll @@ -421,3 +421,83 @@ for.body51: ; preds = %is_sbox.exit155 unreachable } declare fastcc void @get_switch_type(i32, i32, i16 signext, i16 signext, ptr nocapture) nounwind ssp + +; CHECK-LABEL: fold_imm1_csinc_32: +; CHECK: cmp w0, w1 +; CHECK-NEXT: csinc w0, w2, wzr, ge +; CHECK-NEXT: ret +define i32 @fold_imm1_csinc_32(i32 %x, i32 %y, i32 %n) nounwind ssp { +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i32 [ 1, %if.then ], [ %n, %if.else ] + ret i32 %result +} + +; CHECK-LABEL: fold_imm1_csinc_64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: csinc x0, x2, xzr, ge +; CHECK-NEXT: ret +define i64 @fold_imm1_csinc_64(i64 %x, i64 %y, i64 %n) nounwind ssp { +entry: + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i64 [ 1, %if.then ], [ %n, %if.else ] + ret i64 %result +} + +; CHECK-LABEL: fold_imm1_cset_32: +; CHECK: cmp w0, w1 +; CHECK-NEXT: cset w0, lt +; CHECK-NEXT: ret +define i32 @fold_imm1_cset_32(i32 %x, i32 %y) nounwind ssp { +entry: + %cmp = icmp slt i32 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i32 [ 1, %if.then ], [ 0, %if.else ] + ret i32 %result +} + +; CHECK-LABEL: fold_imm1_cset_64: +; CHECK: cmp x0, x1 +; CHECK-NEXT: cset x0, lt +; CHECK-NEXT: ret +define i64 @fold_imm1_cset_64(i64 %x, i64 %y) nounwind ssp { +entry: + %cmp = icmp slt i64 %x, %y + br i1 %cmp, label %if.then, label %if.else + +if.then: + br label %exit + +if.else: + br label %exit + +exit: + %result = phi i64 [ 1, %if.then ], [ 0, %if.else ] + ret i64 %result +} diff --git a/llvm/test/CodeGen/AArch64/peephole-csel.ll b/llvm/test/CodeGen/AArch64/peephole-csel.ll index 868b9f1f2f6ac..b085258059b7e 100644 --- a/llvm/test/CodeGen/AArch64/peephole-csel.ll +++ b/llvm/test/CodeGen/AArch64/peephole-csel.ll @@ -5,10 +5,9 @@ define void @peephole_csel(ptr %dst, i1 %0, i1 %cmp) { ; CHECK-LABEL: peephole_csel: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: tst w1, #0x1 -; CHECK-NEXT: csel x8, x8, x9, eq +; CHECK-NEXT: csinc x8, x8, xzr, ne ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret entry: