diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp index 22da7ddef98a2..9c7db121fa7fd 100644 --- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp +++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp @@ -128,6 +128,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { bool visitINSviGPR(MachineInstr &MI, unsigned Opc); bool visitINSvi64lane(MachineInstr &MI); bool visitFMOVDr(MachineInstr &MI); + bool visitCopy(MachineInstr &MI); bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { @@ -690,6 +691,34 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { return true; } +// Across a basic-block we might have in i32 extract from a value that only +// operates on upper bits (for example a sxtw). We can replace the COPY with a +// new version skipping the sxtw. +bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) { + Register InputReg = MI.getOperand(1).getReg(); + if (MI.getOperand(1).getSubReg() != AArch64::sub_32 || + !MRI->hasOneNonDBGUse(InputReg)) + return false; + + MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg); + MachineInstr *CopyMI = SrcMI; + while (SrcMI && SrcMI->isFullCopy() && + MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg()); + + if (!SrcMI || SrcMI->getOpcode() != AArch64::SBFMXri || + SrcMI->getOperand(2).getImm() != 0 || SrcMI->getOperand(3).getImm() != 31) + return false; + + Register SrcReg = SrcMI->getOperand(1).getReg(); + MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg)); + MI.getOperand(1).setReg(SrcReg); + if (CopyMI != SrcMI) + CopyMI->eraseFromParent(); + SrcMI->eraseFromParent(); + return true; +} + bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -771,6 +800,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { case AArch64::FMOVDr: Changed |= visitFMOVDr(MI); break; + case AArch64::COPY: + Changed |= visitCopy(MI); + break; } } } diff --git a/llvm/lib/Target/AArch64/peephole-sxtw.mir b/llvm/lib/Target/AArch64/peephole-sxtw.mir new file mode 100644 index 0000000000000..6dd91fbf6ec1d --- /dev/null +++ b/llvm/lib/Target/AArch64/peephole-sxtw.mir @@ -0,0 +1,46 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s + +--- +name: removeSxtw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: removeSxtw + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = SBFMXri %0:gpr64, 0, 31 + %2:gpr32sp = COPY %1.sub_32:gpr64 + %3:gpr32sp = ADDWri %2:gpr32sp, 1, 0 + $w0 = COPY %3:gpr32sp + RET_ReallyLR implicit $w0 +... +--- +name: extraCopy +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + ; CHECK-LABEL: name: extraCopy + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY [[COPY]].sub_32 + ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY1]], 1, 0 + ; CHECK-NEXT: $w0 = COPY [[ADDWri]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = SBFMXri %0:gpr64, 0, 31 + %2:gpr64all = COPY %1:gpr64 + %3:gpr32sp = COPY %2.sub_32:gpr64all + %4:gpr32sp = ADDWri %3:gpr32sp, 1, 0 + $w0 = COPY %4:gpr32sp + RET_ReallyLR implicit $w0 +... diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll index e41eb7d38c370..058cbbe9ff13c 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll @@ -281,8 +281,7 @@ define i64 @smull_ldrsw_shift(ptr %x0, i64 %x1) { ; CHECK-LABEL: smull_ldrsw_shift: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrsw x8, [x0] -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: smull x0, w8, w9 +; CHECK-NEXT: smull x0, w8, w1 ; CHECK-NEXT: ret entry: %ext64 = load i32, ptr %x0 @@ -490,8 +489,7 @@ define i64 @smaddl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) { ; CHECK-LABEL: smaddl_ldrsw_shift: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrsw x8, [x0] -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: smaddl x0, w8, w9, x2 +; CHECK-NEXT: smaddl x0, w8, w1, x2 ; CHECK-NEXT: ret entry: %ext64 = load i32, ptr %x0 @@ -654,8 +652,7 @@ define i64 @smnegl_ldrsw_shift(ptr %x0, i64 %x1) { ; CHECK-LABEL: smnegl_ldrsw_shift: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrsw x8, [x0] -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: smnegl x0, w8, w9 +; CHECK-NEXT: smnegl x0, w8, w1 ; CHECK-NEXT: ret entry: %ext64 = load i32, ptr %x0 @@ -818,8 +815,7 @@ define i64 @smsubl_ldrsw_shift(ptr %x0, i64 %x1, i64 %x2) { ; CHECK-LABEL: smsubl_ldrsw_shift: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrsw x8, [x0] -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: smsubl x0, w8, w9, x2 +; CHECK-NEXT: smsubl x0, w8, w1, x2 ; CHECK-NEXT: ret entry: %ext64 = load i32, ptr %x0