Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Scalarizer] Fix a non-deterministic scatter order problem
Summary: The indexing operator in Scatterer may result in building new instructions. When using multiple such operators in a function argument list the order in which we build instructions depend on argument evaluation order (which is undefined in C++). This patch avoid such problems by expanding the components using the [] operator prior to the function call. Problem was seen when comparing output, while builing LLVM with different compilers (clang vs gcc). Reviewers: foad, cameron.mcinally, uabelho Reviewed By: foad Subscribers: hiraditya, mgrang, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D78455
- Loading branch information
Showing
2 changed files
with
107 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | ||
; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s | ||
; RUN: opt %s -passes='function(scalarizer)' -scalarize-load-store -S | FileCheck %s | ||
|
||
; This verifies that the order of extract element instructions is | ||
; deterministic. In the past we could end up with different results depending | ||
; on the compiler used (due to argument evaluation order being undefined in | ||
; C++). The order of the extracts is not really important for correctness of | ||
; the result, but when debugging and creating test cases it is helpful if we | ||
; get the same out put regardless of which compiler we use when building the | ||
; compiler. | ||
|
||
define <2 x i32> @test1(i1 %b, <2 x i32> %i, <2 x i32> %j) { | ||
; CHECK-LABEL: @test1( | ||
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 | ||
; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 | ||
; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B:%.*]], i32 [[I_I0]], i32 [[J_I0]] | ||
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 | ||
; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 | ||
; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B]], i32 [[I_I1]], i32 [[J_I1]] | ||
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 | ||
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 | ||
; CHECK-NEXT: ret <2 x i32> [[RES]] | ||
; | ||
%res = select i1 %b, <2 x i32> %i, <2 x i32> %j | ||
ret <2 x i32> %res | ||
} | ||
|
||
define <2 x i32> @test2(<2 x i1> %b, <2 x i32> %i, <2 x i32> %j) { | ||
; CHECK-LABEL: @test2( | ||
; CHECK-NEXT: [[B_I0:%.*]] = extractelement <2 x i1> [[B:%.*]], i32 0 | ||
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 | ||
; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 | ||
; CHECK-NEXT: [[RES_I0:%.*]] = select i1 [[B_I0]], i32 [[I_I0]], i32 [[J_I0]] | ||
; CHECK-NEXT: [[B_I1:%.*]] = extractelement <2 x i1> [[B]], i32 1 | ||
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 | ||
; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 | ||
; CHECK-NEXT: [[RES_I1:%.*]] = select i1 [[B_I1]], i32 [[I_I1]], i32 [[J_I1]] | ||
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 | ||
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 | ||
; CHECK-NEXT: ret <2 x i32> [[RES]] | ||
; | ||
%res = select <2 x i1> %b, <2 x i32> %i, <2 x i32> %j | ||
ret <2 x i32> %res | ||
} | ||
|
||
define <2 x i32> @test3(<2 x i32> %i, <2 x i32> %j) { | ||
; CHECK-LABEL: @test3( | ||
; CHECK-NEXT: [[I_I0:%.*]] = extractelement <2 x i32> [[I:%.*]], i32 0 | ||
; CHECK-NEXT: [[J_I0:%.*]] = extractelement <2 x i32> [[J:%.*]], i32 0 | ||
; CHECK-NEXT: [[RES_I0:%.*]] = add nuw nsw i32 [[I_I0]], [[J_I0]] | ||
; CHECK-NEXT: [[I_I1:%.*]] = extractelement <2 x i32> [[I]], i32 1 | ||
; CHECK-NEXT: [[J_I1:%.*]] = extractelement <2 x i32> [[J]], i32 1 | ||
; CHECK-NEXT: [[RES_I1:%.*]] = add nuw nsw i32 [[I_I1]], [[J_I1]] | ||
; CHECK-NEXT: [[RES_UPTO0:%.*]] = insertelement <2 x i32> undef, i32 [[RES_I0]], i32 0 | ||
; CHECK-NEXT: [[RES:%.*]] = insertelement <2 x i32> [[RES_UPTO0]], i32 [[RES_I1]], i32 1 | ||
; CHECK-NEXT: ret <2 x i32> [[RES]] | ||
; | ||
%res = add nuw nsw <2 x i32> %i, %j | ||
ret <2 x i32> %res | ||
} | ||
|
||
define void @test4(<2 x i32>* %ptr, <2 x i32> %val) { | ||
; CHECK-LABEL: @test4( | ||
; CHECK-NEXT: [[VAL_I0:%.*]] = extractelement <2 x i32> [[VAL:%.*]], i32 0 | ||
; CHECK-NEXT: [[PTR_I0:%.*]] = bitcast <2 x i32>* [[PTR:%.*]] to i32* | ||
; CHECK-NEXT: store i32 [[VAL_I0]], i32* [[PTR_I0]], align 8 | ||
; CHECK-NEXT: [[VAL_I1:%.*]] = extractelement <2 x i32> [[VAL]], i32 1 | ||
; CHECK-NEXT: [[PTR_I1:%.*]] = getelementptr i32, i32* [[PTR_I0]], i32 1 | ||
; CHECK-NEXT: store i32 [[VAL_I1]], i32* [[PTR_I1]], align 4 | ||
; CHECK-NEXT: ret void | ||
; | ||
store <2 x i32> %val, <2 x i32> *%ptr | ||
ret void | ||
} | ||
|