-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Add constexpr support for addsub and select intrinsics #167512
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | |||||
| F.subtract(RHS, RM); | ||||||
| return F; | ||||||
| }); | ||||||
| case clang::X86::BI__builtin_ia32_addsubpd: | ||||||
| case clang::X86::BI__builtin_ia32_addsubps: | ||||||
| case clang::X86::BI__builtin_ia32_addsubpd256: | ||||||
| case clang::X86::BI__builtin_ia32_addsubps256: { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use a static function like everything else. |
||||||
| // Addsub: alternates between subtraction and addition | ||||||
| // Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i]) | ||||||
| const Pointer &RHS = S.Stk.pop<Pointer>(); | ||||||
| const Pointer &LHS = S.Stk.pop<Pointer>(); | ||||||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||||||
| FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts()); | ||||||
| llvm::RoundingMode RM = getRoundingMode(FPO); | ||||||
| const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>(); | ||||||
| unsigned NumElts = VT->getNumElements(); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
|
||||||
| using T = PrimConv<PT_Float>::T; | ||||||
| for (unsigned I = 0; I < NumElts; ++I) { | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| APFloat LElem = LHS.elem<T>(I).getAPFloat(); | ||||||
| APFloat RElem = RHS.elem<T>(I).getAPFloat(); | ||||||
| if (I % 2 == 0) { | ||||||
| // Even indices: subtract | ||||||
| LElem.subtract(RElem, RM); | ||||||
| } else { | ||||||
| // Odd indices: add | ||||||
| LElem.add(RElem, RM); | ||||||
| } | ||||||
| Dst.elem<T>(I) = static_cast<T>(LElem); | ||||||
| } | ||||||
| Dst.initializeAllElements(); | ||||||
| return true; | ||||||
| } | ||||||
|
|
||||||
| case clang::X86::BI__builtin_ia32_pmuldq128: | ||||||
| case clang::X86::BI__builtin_ia32_pmuldq256: | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8383,24 +8383,30 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { | |
| (__v16sf)_mm512_setzero_ps()); | ||
| } | ||
|
|
||
| static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR | ||
| _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { | ||
| static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, | ||
| __mmask8 __U, | ||
| __m128 __A, | ||
| __m128 __B) { | ||
| return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W); | ||
| } | ||
|
|
||
| static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR | ||
| _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) { | ||
| static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, | ||
| __m128 __A, | ||
| __m128 __B) { | ||
| return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), | ||
| _mm_setzero_ps()); | ||
| } | ||
|
|
||
| static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR | ||
| _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { | ||
| static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, | ||
| __mmask8 __U, | ||
| __m128d __A, | ||
| __m128d __B) { | ||
| return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W); | ||
| } | ||
|
|
||
| static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR | ||
| _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) { | ||
| static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, | ||
| __m128d __A, | ||
| __m128d __B) { | ||
| return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pull this out into its own PR |
||
| _mm_setzero_pd()); | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this also affects the max/min methods - you need to split off the addsub builtins before you make them constexpr (maybe add them further down to the block with movmskpd256 etc?)