Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions llvm/lib/Target/AArch64/AArch64Features.td
Original file line number Diff line number Diff line change
Expand Up @@ -612,8 +612,11 @@ def FeatureExperimentalZeroingPseudos
def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;

def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it possible there's more places that need to be updated (like TargetParser)? I'm seeing a bunch of '+zcm' is not a recognized feature for this target (ignoring feature) warnings when building anything with Clang on macOS now

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, will push a fix

"Has zero-cycle register moves">;
def FeatureZCRegMoveGPR64 : SubtargetFeature<"zcm-gpr64", "HasZeroCycleRegMoveGPR64", "true",
"Has zero-cycle register moves for GPR64 registers">;

def FeatureZCRegMoveGPR32 : SubtargetFeature<"zcm-gpr32", "HasZeroCycleRegMoveGPR32", "true",
"Has zero-cycle register moves for GPR32 registers">;

def FeatureZCRegMoveFPR64 : SubtargetFeature<"zcm-fpr64", "HasZeroCycleRegMoveFPR64", "true",
"Has zero-cycle register moves for FPR64 registers">;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5037,7 +5037,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,

if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
// If either operand is WSP, expand to ADD #0.
if (Subtarget.hasZeroCycleRegMove()) {
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
Expand All @@ -5063,7 +5064,8 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addImm(0)
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
} else {
if (Subtarget.hasZeroCycleRegMove()) {
if (Subtarget.hasZeroCycleRegMoveGPR64() &&
!Subtarget.hasZeroCycleRegMoveGPR32()) {
// Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These comments would be good to keep, I think.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will restore them

MCRegister DestRegX = TRI->getMatchingSuperReg(
DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
Expand Down
20 changes: 10 additions & 10 deletions llvm/lib/Target/AArch64/AArch64Processors.td
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES, FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing,
FeatureZCZeroingFPWorkaround]>;
Expand All @@ -325,7 +325,7 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -338,7 +338,7 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -351,7 +351,7 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -364,7 +364,7 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
FeatureFuseAES,
FeatureFuseCryptoEOR,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -382,7 +382,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -400,7 +400,7 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -418,7 +418,7 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -436,7 +436,7 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing]>;

Expand All @@ -453,7 +453,7 @@ def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
FeatureZCRegMove,
FeatureZCRegMoveGPR64,
FeatureZCRegMoveFPR64,
FeatureZCZeroing
]>;
Expand Down
54 changes: 54 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
; RUN: llc < %s -mtriple=arm64-linux-gnu | FileCheck %s -check-prefixes=NOTCPU-LINUX --match-full-lines
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=generic | FileCheck %s -check-prefixes=NOTCPU-APPLE --match-full-lines
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 | FileCheck %s -check-prefixes=CPU --match-full-lines
; RUN: llc < %s -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 | FileCheck %s -check-prefixes=NOTATTR --match-full-lines
; RUN: llc < %s -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 | FileCheck %s -check-prefixes=ATTR --match-full-lines

define void @zero_cycle_regmov_GPR32(i32 %a, i32 %b, i32 %c, i32 %d) {
entry:
; CHECK-LABEL: t:
; NOTCPU-LINUX: mov w0, w2
; NOTCPU-LINUX: mov w1, w3
; NOTCPU-LINUX: mov [[REG2:w[0-9]+]], w3
; NOTCPU-LINUX: mov [[REG1:w[0-9]+]], w2
; NOTCPU-LINUX-NEXT: bl {{_?foo_i32}}
; NOTCPU-LINUX: mov w0, [[REG1]]
; NOTCPU-LINUX: mov w1, [[REG2]]

; NOTCPU-APPLE: mov w0, w2
; NOTCPU-APPLE: mov w1, w3
; NOTCPU-APPLE: mov [[REG2:w[0-9]+]], w3
; NOTCPU-APPLE: mov [[REG1:w[0-9]+]], w2
; NOTCPU-APPLE-NEXT: bl {{_?foo_i32}}
; NOTCPU-APPLE: mov w0, [[REG1]]
; NOTCPU-APPLE: mov w1, [[REG2]]

; CPU: mov [[REG2:x[0-9]+]], x3
; CPU: mov [[REG1:x[0-9]+]], x2
; CPU: mov x0, x2
; CPU: mov x1, x3
; CPU-NEXT: bl {{_?foo_i32}}
; CPU: mov x0, [[REG1]]
; CPU: mov x1, [[REG2]]

; NOTATTR: mov [[REG2:w[0-9]+]], w3
; NOTATTR: mov [[REG1:w[0-9]+]], w2
; NOTATTR: mov w0, w2
; NOTATTR: mov w1, w3
; NOTATTR-NEXT: bl {{_?foo_i32}}
; NOTATTR: mov w0, [[REG1]]
; NOTATTR: mov w1, [[REG2]]

; ATTR: mov x0, x2
; ATTR: mov x1, x3
; ATTR: mov [[REG2:x[0-9]+]], x3
; ATTR: mov [[REG1:x[0-9]+]], x2
; ATTR-NEXT: bl {{_?foo_i32}}
; ATTR: mov x0, [[REG1]]
; ATTR: mov x1, [[REG2]]
%call = call i32 @foo_i32(i32 %c, i32 %d)
%call1 = call i32 @foo_i32(i32 %c, i32 %d)
unreachable
}

declare i32 @foo_i32(i32, i32)
45 changes: 0 additions & 45 deletions llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov-gpr32.ll

This file was deleted.