Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SCEV] Add zext(C + x + ...) -> D + zext(C-D + x + ...)<nuw><nsw> tra…
…nsform if the top level addition in (D + (C-D + x + ...)) could be proven to not wrap, where the choice of D also maximizes the number of trailing zeroes of (C-D + x + ...), ensuring homogeneous behaviour of the transformation and better canonicalization of such expressions. This enables better canonicalization of expressions like 1 + zext(5 + 20 * %x + 24 * %y) and zext(6 + 20 * %x + 24 * %y) which get both transformed to 2 + zext(4 + 20 * %x + 24 * %y) This pattern is common in address arithmetics and the transformation makes it easier for passes like LoadStoreVectorizer to prove that 2 or more memory accesses are consecutive and optimize (vectorize) them. Reviewed By: mzolotukhin Differential Revision: https://reviews.llvm.org/D48853 llvm-svn: 337859
- Loading branch information
Showing
3 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
78 changes: 78 additions & 0 deletions
78
llvm/test/Transforms/LoadStoreVectorizer/X86/codegenprepare-produced-address-math.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
; RUN: opt -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s | ||
; RUN: opt -load-store-vectorizer %s -S -o - | FileCheck %s | ||
|
||
target triple = "x86_64--" | ||
|
||
%union = type { { [4 x [4 x [4 x [16 x float]]]], [4 x [4 x [4 x [16 x float]]]], [10 x [10 x [4 x float]]] } } | ||
|
||
@global_pointer = external unnamed_addr global { %union, [2000 x i8] }, align 4 | ||
|
||
; Function Attrs: convergent nounwind | ||
define void @test(i32 %base) #0 { | ||
; CHECK-LABEL: @test( | ||
; CHECK-NOT: load i32 | ||
; CHECK: load <2 x i32> | ||
; CHECK-NOT: load i32 | ||
entry: | ||
%mul331 = and i32 %base, -4 | ||
%add350.4 = add i32 4, %mul331 | ||
%idx351.4 = zext i32 %add350.4 to i64 | ||
%arrayidx352.4 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.4 | ||
%tmp296.4 = bitcast float* %arrayidx352.4 to i32* | ||
%add350.5 = add i32 5, %mul331 | ||
%idx351.5 = zext i32 %add350.5 to i64 | ||
%arrayidx352.5 = getelementptr inbounds { %union, [2000 x i8] }, { %union, [2000 x i8] }* @global_pointer, i64 0, i32 0, i32 0, i32 1, i64 0, i64 0, i64 0, i64 %idx351.5 | ||
%tmp296.5 = bitcast float* %arrayidx352.5 to i32* | ||
%cnd = icmp ult i32 %base, 1000 | ||
br i1 %cnd, label %loads, label %exit | ||
|
||
loads: | ||
; If and only if the loads are in a different BB from the GEPs codegenprepare | ||
; would try to turn the GEPs into math, which makes LoadStoreVectorizer's job | ||
; harder | ||
%tmp297.4 = load i32, i32* %tmp296.4, align 4, !tbaa !0 | ||
%tmp297.5 = load i32, i32* %tmp296.5, align 4, !tbaa !0 | ||
br label %exit | ||
|
||
exit: | ||
ret void | ||
} | ||
|
||
; Function Attrs: convergent nounwind | ||
define void @test.codegenprepared(i32 %base) #0 { | ||
; CHECK-LABEL: @test.codegenprepared( | ||
; CHECK-NOT: load i32 | ||
; CHECK: load <2 x i32> | ||
; CHECK-NOT: load i32 | ||
entry: | ||
%mul331 = and i32 %base, -4 | ||
%add350.4 = add i32 4, %mul331 | ||
%idx351.4 = zext i32 %add350.4 to i64 | ||
%add350.5 = add i32 5, %mul331 | ||
%idx351.5 = zext i32 %add350.5 to i64 | ||
%cnd = icmp ult i32 %base, 1000 | ||
br i1 %cnd, label %loads, label %exit | ||
|
||
loads: ; preds = %entry | ||
%sunkaddr = mul i64 %idx351.4, 4 | ||
%sunkaddr1 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr | ||
%sunkaddr2 = getelementptr inbounds i8, i8* %sunkaddr1, i64 4096 | ||
%0 = bitcast i8* %sunkaddr2 to i32* | ||
%tmp297.4 = load i32, i32* %0, align 4, !tbaa !0 | ||
%sunkaddr3 = mul i64 %idx351.5, 4 | ||
%sunkaddr4 = getelementptr inbounds i8, i8* bitcast ({ %union, [2000 x i8] }* @global_pointer to i8*), i64 %sunkaddr3 | ||
%sunkaddr5 = getelementptr inbounds i8, i8* %sunkaddr4, i64 4096 | ||
%1 = bitcast i8* %sunkaddr5 to i32* | ||
%tmp297.5 = load i32, i32* %1, align 4, !tbaa !0 | ||
br label %exit | ||
|
||
exit: ; preds = %loads, %entry | ||
ret void | ||
} | ||
|
||
attributes #0 = { convergent nounwind } | ||
|
||
!0 = !{!1, !1, i64 0} | ||
!1 = !{!"float", !2, i64 0} | ||
!2 = !{!"omnipotent char", !3, i64 0} | ||
!3 = !{!"Simple C++ TBAA"} |