Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[llvm][CodeGen] DAG Combiner folds for vscale.
Summary: This patch simplifies the DAGs generated when using the intrinsic `@llvm.vscale.*` as follows: * Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). * Canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)). * Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). * Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). The test `sve-gep-ll` have been updated to reflect the folding introduced by this patch. Reviewers: efriedma, sdesmalen, andwar, rengolin Reviewed By: sdesmalen Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74782
- Loading branch information
Francesco Petrogalli
committed
Feb 21, 2020
1 parent
b178555
commit 31ec721
Showing
3 changed files
with
129 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s |FileCheck %s | ||
|
||
declare i32 @llvm.vscale.i32() | ||
declare i64 @llvm.vscale.i64() | ||
|
||
; Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). | ||
define i64 @combine_add_vscale_i64() nounwind { | ||
; CHECK-LABEL: combine_add_vscale_i64: | ||
; CHECK-NOT: add | ||
; CHECK-NEXT: cntd x0 | ||
; CHECK-NEXT: ret | ||
%vscale = call i64 @llvm.vscale.i64() | ||
%add = add i64 %vscale, %vscale | ||
ret i64 %add | ||
} | ||
|
||
define i32 @combine_add_vscale_i32() nounwind { | ||
; CHECK-LABEL: combine_add_vscale_i32: | ||
; CHECK-NOT: add | ||
; CHECK-NEXT: cntd x0 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale.i32() | ||
%add = add i32 %vscale, %vscale | ||
ret i32 %add | ||
} | ||
|
||
; Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). | ||
; In this test, C0 = 1, C1 = 32. | ||
define i64 @combine_mul_vscale_i64() nounwind { | ||
; CHECK-LABEL: combine_mul_vscale_i64: | ||
; CHECK-NOT: mul | ||
; CHECK-NEXT: rdvl x0, #2 | ||
; CHECK-NEXT: ret | ||
%vscale = call i64 @llvm.vscale.i64() | ||
%mul = mul i64 %vscale, 32 | ||
ret i64 %mul | ||
} | ||
|
||
define i32 @combine_mul_vscale_i32() nounwind { | ||
; CHECK-LABEL: combine_mul_vscale_i32: | ||
; CHECK-NOT: mul | ||
; CHECK-NEXT: rdvl x0, #3 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale.i32() | ||
%mul = mul i32 %vscale, 48 | ||
ret i32 %mul | ||
} | ||
|
||
; Canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) | ||
define i64 @combine_sub_vscale_i64(i64 %in) nounwind { | ||
; CHECK-LABEL: combine_sub_vscale_i64: | ||
; CHECK-NOT: sub | ||
; CHECK-NEXT: rdvl x8, #-1 | ||
; CHECK-NEXT: asr x8, x8, #4 | ||
; CHECK-NEXT: add x0, x0, x8 | ||
; CHECK-NEXT: ret | ||
%vscale = call i64 @llvm.vscale.i64() | ||
%sub = sub i64 %in, %vscale | ||
ret i64 %sub | ||
} | ||
|
||
define i32 @combine_sub_vscale_i32(i32 %in) nounwind { | ||
; CHECK-LABEL: combine_sub_vscale_i32: | ||
; CHECK-NOT: sub | ||
; CHECK-NEXT: rdvl x8, #-1 | ||
; CHECK-NEXT: asr x8, x8, #4 | ||
; CHECK-NEXT: add w0, w0, w8 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale.i32() | ||
%sub = sub i32 %in, %vscale | ||
ret i32 %sub | ||
} | ||
|
||
; Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). | ||
; C0 = 1 , C1 = 4 | ||
; At IR level, %shl = 2^4 * VSCALE. | ||
; At Assembly level, the output of RDVL is also 2^4 * VSCALE. | ||
; Hence, the immediate for RDVL is #1. | ||
define i64 @combine_shl_vscale_i64() nounwind { | ||
; CHECK-LABEL: combine_shl_vscale_i64: | ||
; CHECK-NOT: shl | ||
; CHECK-NEXT: rdvl x0, #1 | ||
; CHECK-NEXT: ret | ||
%vscale = call i64 @llvm.vscale.i64() | ||
%shl = shl i64 %vscale, 4 | ||
ret i64 %shl | ||
} | ||
|
||
define i32 @combine_shl_vscale_i32() nounwind { | ||
; CHECK-LABEL: combine_shl_vscale_i32: | ||
; CHECK-NOT: shl | ||
; CHECK-NEXT: rdvl x0, #1 | ||
; CHECK-NEXT: ret | ||
%vscale = call i32 @llvm.vscale.i32() | ||
%shl = shl i32 %vscale, 4 | ||
ret i32 %shl | ||
} |