-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[X86][ByteCode] Allow PSHUFB intrinsics to be used in constexpr #156612 #163148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8d16fb7
2efb420
ac77cd1
709e316
df3cf14
734ee27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2790,6 +2790,34 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, | |
| return true; | ||
| } | ||
|
|
||
| static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC, | ||
| const CallExpr *Call) { | ||
| assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); | ||
| const Pointer &Control = S.Stk.pop<Pointer>(); | ||
| const Pointer &Src = S.Stk.pop<Pointer>(); | ||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
|
||
| unsigned NumElems = Dst.getNumElems(); | ||
| assert(NumElems == Control.getNumElems()); | ||
| assert(NumElems == Dst.getNumElems()); | ||
|
|
||
| for (unsigned Idx = 0; Idx != NumElems; ++Idx) { | ||
| uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx)); | ||
|
|
||
| if (Ctlb & 0x80) { | ||
| Dst.elem<int8_t>(Idx) = 0; | ||
| } else { | ||
| unsigned LaneBase = (Idx / 16) * 16; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this just a right shift and a left shift? That would seem more readable code, intent wise. Also 16 is a magic number, can we name it. Same applies in the code below. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I can change that ,so LaneBase will then be (Idx >> 4) <<4, then there is no need for naming 16 , and i can also name 0x80 and 0x0F MSBMask and LowNibbleMask for clarity. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if MSBMask/LowNibbleMask obfuscation is a great idea - keeping reasonably close to the psuedocode in the Intel Intrinsics Guide is probably a better guideline. |
||
| unsigned SrcOffset = Ctlb & 0x0F; | ||
| unsigned SrcIdx = LaneBase + SrcOffset; | ||
|
|
||
| Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx); | ||
| } | ||
| } | ||
| Dst.initializeAllElements(); | ||
| return true; | ||
| } | ||
|
|
||
| static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, | ||
| const CallExpr *Call, bool IsShufHW) { | ||
| assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); | ||
|
|
@@ -3943,6 +3971,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, | |
| case X86::BI__builtin_ia32_selectpd_512: | ||
| return interp__builtin_select(S, OpPC, Call); | ||
|
|
||
| case X86::BI__builtin_ia32_pshufb128: | ||
| case X86::BI__builtin_ia32_pshufb256: | ||
| case X86::BI__builtin_ia32_pshufb512: | ||
| return interp__builtin_ia32_pshufb(S, OpPC, Call); | ||
|
|
||
| case X86::BI__builtin_ia32_pshuflw: | ||
| case X86::BI__builtin_ia32_pshuflw256: | ||
| case X86::BI__builtin_ia32_pshuflw512: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11619,6 +11619,44 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, | |
| return true; | ||
| } | ||
|
|
||
| static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I initially explored evalPshufBuiltin, but since the function handles the control as a scalar and applies different shuffle logic, integrating the pshufb seemed to require heavy conditional branching which i thought might complicate readability. However, this is my first LLVM contribution and i completely understand if i am missing some broader context. I am happy to rework this or refactor both implementations together if that would be a good long term solution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have been looking at options for using a callback mechanism for shuffle mask decoding - similar to what we do for binops - I've suggested this to @chaitanyav on #164078 and we can build on that. |
||
| APValue &Out) { | ||
| APValue SrcVec, ControlVec; | ||
| if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec)) | ||
| return false; | ||
| if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec)) | ||
| return false; | ||
|
|
||
| const auto *VT = Call->getType()->getAs<VectorType>(); | ||
| if (!VT) | ||
| return false; | ||
|
|
||
| QualType ElemT = VT->getElementType(); | ||
| unsigned NumElts = VT->getNumElements(); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have assertions in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The definition in BuiltnsX86.td will enforce them, so we can remove them from the InterpBuiltin too. |
||
|
|
||
| SmallVector<APValue, 64> ResultElements; | ||
| ResultElements.reserve(NumElts); | ||
|
|
||
| for (unsigned Idx = 0; Idx != NumElts; ++Idx) { | ||
| APValue CtlVal = ControlVec.getVectorElt(Idx); | ||
| APSInt CtlByte = CtlVal.getInt(); | ||
| uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue()); | ||
|
|
||
| if (Ctl & 0x80) { | ||
| APValue Zero(Info.Ctx.MakeIntValue(0, ElemT)); | ||
| ResultElements.push_back(Zero); | ||
| } else { | ||
| unsigned LaneBase = (Idx / 16) * 16; | ||
| unsigned SrcOffset = Ctl & 0x0F; | ||
| unsigned SrcIdx = LaneBase + SrcOffset; | ||
|
|
||
| ResultElements.push_back(SrcVec.getVectorElt(SrcIdx)); | ||
| } | ||
| } | ||
| Out = APValue(ResultElements.data(), ResultElements.size()); | ||
| return true; | ||
| } | ||
|
|
||
| static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call, | ||
| bool IsShufHW, APValue &Out) { | ||
| APValue Vec; | ||
|
|
@@ -12241,6 +12279,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { | |
| return Success(APValue(ResultElements.data(), ResultElements.size()), E); | ||
| } | ||
|
|
||
| case X86::BI__builtin_ia32_pshufb128: | ||
| case X86::BI__builtin_ia32_pshufb256: | ||
| case X86::BI__builtin_ia32_pshufb512: { | ||
| APValue R; | ||
| if (!evalPshufbBuiltin(Info, E, R)) | ||
| return false; | ||
| return Success(R, E); | ||
| } | ||
|
|
||
| case X86::BI__builtin_ia32_pshuflw: | ||
| case X86::BI__builtin_ia32_pshuflw256: | ||
| case X86::BI__builtin_ia32_pshuflw512: { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are asserting the assignment right before the asserts? Was that what you meant to do?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That was a mistake, it should compare Dst with Src and Contol.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really need these? The ia32 builtin defs are pretty strict
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not really the asserts can be removed completely.