Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[X86] Don't consider functions ABI compatible for ArgumentPromotion p…
…ass if they view 512-bit vectors differently. The use of the -mprefer-vector-width=256 command line option mixed with functions using vector intrinsics can create situations where one function thinks 512 vectors are legal, but another fucntion does not. If a 512 bit vector is passed between them via a pointer, its possible ArgumentPromotion might try to pass by value instead. This will result in type legalization for the two functions handling the 512 bit vector differently leading to runtime failures. Had the 512 bit vector been passed by value from clang codegen, both functions would have been tagged with a min-legal-vector-width=512 function attribute. That would make them be legalized the same way. I observed this issue in 32-bit mode where a union containing a 512 bit vector was being passed by a function that used intrinsics to one that did not. The caller ended up passing in zmm0 and the callee tried to read it from ymm0 and ymm1. The fix implemented here is just to consider it a mismatch if two functions would handle 512 bit differently without looking at the types that are being considered. This is the easist and safest fix, but it can be improved in the future. Differential Revision: https://reviews.llvm.org/D58390 llvm-svn: 354376
- Loading branch information
Showing
3 changed files
with
203 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
184 changes: 184 additions & 0 deletions
184
llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,184 @@ | ||
; RUN: opt -S -argpromotion < %s | FileCheck %s | ||
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s | ||
; Test that we only promote arguments when the caller/callee have compatible | ||
; function attrubtes. | ||
|
||
target triple = "x86_64-unknown-linux-gnu" | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg) #0 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #1 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg) #0 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg) #1 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should not promote | ||
; CHECK-LABEL: @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) | ||
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg) #2 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should not promote | ||
; CHECK-LABEL: @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) | ||
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #2 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %arg) #1 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg) #4 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; This should promote | ||
; CHECK-LABEL: @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64> %arg1.val) | ||
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 { | ||
bb: | ||
%tmp = load <8 x i64>, <8 x i64>* %arg1 | ||
store <8 x i64> %tmp, <8 x i64>* %arg | ||
ret void | ||
} | ||
|
||
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg) #3 { | ||
bb: | ||
%tmp = alloca <8 x i64>, align 32 | ||
%tmp2 = alloca <8 x i64>, align 32 | ||
%tmp3 = bitcast <8 x i64>* %tmp to i8* | ||
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false) | ||
call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %tmp2, <8 x i64>* %tmp) | ||
%tmp4 = load <8 x i64>, <8 x i64>* %tmp2, align 32 | ||
store <8 x i64> %tmp4, <8 x i64>* %arg, align 2 | ||
ret void | ||
} | ||
|
||
; Function Attrs: argmemonly nounwind | ||
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #5 | ||
|
||
attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" } | ||
attributes #1 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="256" } | ||
attributes #2 = { inlinehint norecurse nounwind uwtable "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" } | ||
attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="512" "prefer-vector-width"="256" } | ||
attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } | ||
attributes #5 = { argmemonly nounwind } |