Skip to content

Commit

Permalink
[AArch64] Enable FeatureFuseAES for the generic processor model.
Browse files Browse the repository at this point in the history
Summary:
Scheduling AESE/AESMC and AESD/AESIMC instruction pairs back-to-back
gives a double digit speedup on benchmarks using those instructions on
Cortex-A processors. In GCC, this optimization is part of the generic
processor model as well.

This change should not have a major performance impact on processors
that do not optimize AES instruction pairs, although I only had access
to Cortex-A processors for benchmarking.


Reviewers: rengolin, kristof.beyls, javed.absar, evandro, silviu.baranga, MatzeB, mcrosier, joelkevinjones, joel_k_jones, bmakam, t.p.northover

Reviewed By: evandro

Subscribers: sbaranga, aemerson, llvm-commits

Differential Revision: https://reviews.llvm.org/D33836

llvm-svn: 305457
  • Loading branch information
fhahn committed Jun 15, 2017
1 parent d078254 commit 0a26d2c
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 36 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/AArch64/AArch64.td
Expand Up @@ -362,6 +362,7 @@ def ProcThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",

def : ProcessorModel<"generic", NoSchedModel, [
FeatureFPARMv8,
FeatureFuseAES,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler
Expand Down
77 changes: 41 additions & 36 deletions llvm/test/CodeGen/AArch64/misched-fusion-aes.ll
@@ -1,7 +1,9 @@
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKCORTEX
; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSEALLPAIRS
; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKM1

declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k)
Expand Down Expand Up @@ -74,22 +76,23 @@ define void @aesea(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
ret void

; CHECK-LABEL: aesea:
; CHECKCORTEX: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
; CHECKCORTEX: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
; CHECKCORTEX: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VC]]
; CHECKCORTEX: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VD]]
; CHECKCORTEX: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VE]]
; CHECKCORTEX: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VF]]
; CHECKCORTEX: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
; CHECKCORTEX: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
; CHECKFUSEALLPAIRS: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
; CHECKFUSEALLPAIRS: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
; CHECKFUSEALLPAIRS: aese [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VC]]
; CHECKFUSEALLPAIRS: aese [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VD]]
; CHECKFUSEALLPAIRS: aese [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VE]]
; CHECKFUSEALLPAIRS: aese [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VF]]
; CHECKFUSEALLPAIRS: aese [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VG]]
; CHECKFUSEALLPAIRS: aese [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesmc {{v[0-7].16b}}, [[VH]]
; CHECKFUSEALLPAIRS-NOT: aesmc

; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
Expand Down Expand Up @@ -175,22 +178,23 @@ define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d,
ret void

; CHECK-LABEL: aesda:
; CHECKCORTEX: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
; CHECKCORTEX: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VB]]
; CHECKCORTEX: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VC]]
; CHECKCORTEX: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VD]]
; CHECKCORTEX: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VE]]
; CHECKCORTEX: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VF]]
; CHECKCORTEX: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
; CHECKCORTEX: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
; CHECKFUSEALLPAIRS: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
; CHECKFUSEALLPAIRS: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VB]]
; CHECKFUSEALLPAIRS: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VC]]
; CHECKFUSEALLPAIRS: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VD]]
; CHECKFUSEALLPAIRS: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VE]]
; CHECKFUSEALLPAIRS: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VF]]
; CHECKFUSEALLPAIRS: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VG]]
; CHECKFUSEALLPAIRS: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}}
; CHECKFUSEALLPAIRS-NEXT: aesimc {{v[0-7].16b}}, [[VH]]
; CHECKFUSEALLPAIRS-NOT: aesimc

; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}}
; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VA]]
Expand Down Expand Up @@ -236,4 +240,5 @@ entry:
; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VA]]
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
; CHECK-NEXT: aesmc {{v[0-7].16b}}, [[VB]]
; CHECK-NOT: aesmc
}

0 comments on commit 0a26d2c

Please sign in to comment.