Skip to content

Commit

Permalink
[AArch64] Add new subtarget feature to fuse AES crypto operations
Browse files Browse the repository at this point in the history
This feature enables the fusion of such operations on Cortex A57, as
recommended in its Software Optimisation Guide, section 4.13, and on Exynos
M1.

Differential revision: https://reviews.llvm.org/D28491

llvm-svn: 293738
  • Loading branch information
Evandro Menezes committed Feb 1, 2017
1 parent 94edf02 commit b21fb29
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 2 deletions.
6 changes: 6 additions & 0 deletions llvm/lib/Target/AArch64/AArch64.td
Expand Up @@ -103,6 +103,10 @@ def FeatureArithmeticCbzFusion : SubtargetFeature<
"arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
"CPU fuses arithmetic + cbz/cbnz operations">;

def FeatureFuseAES : SubtargetFeature<
"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;

def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
Expand Down Expand Up @@ -184,6 +188,7 @@ def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
Expand Down Expand Up @@ -230,6 +235,7 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
Expand Up @@ -116,6 +116,19 @@ static bool shouldScheduleAdjacent(const AArch64InstrInfo &TII,
return true;
}

if (ST.hasFuseAES())
// Fuse AES crypto operations.
switch(FirstOpcode) {
// AES encode.
case AArch64::AESErr:
return SecondOpcode == AArch64::AESMCrr ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
// AES decode.
case AArch64::AESDrr:
return SecondOpcode == AArch64::AESIMCrr ||
SecondOpcode == AArch64::INSTRUCTION_LIST_END;
}

return false;
}

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedA57.td
Expand Up @@ -162,7 +162,9 @@ def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>;
// Cryptography Extensions
// -----------------------------------------------------------------------------

def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>;
def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>;
def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64SchedM1.td
Expand Up @@ -366,7 +366,8 @@ def : InstRW<[M1WriteNALU1], (instregex "^ZIP[12]v")>;
// Cryptography instructions.
def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; }
def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>;
def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>;
def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>;

def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>;
def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>;
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64Subtarget.h
Expand Up @@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasFuseAES = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
uint8_t MaxInterleaveFactor = 2;
Expand Down Expand Up @@ -197,6 +198,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAES() const { return HasFuseAES; }
bool useRSqrt() const { return UseRSqrt; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
Expand Down

0 comments on commit b21fb29

Please sign in to comment.