-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Stop custom-widening v2f32 = fpext v2bf16 #80106
Conversation
d32872e
to
3a4a336
Compare
@llvm/pr-subscribers-backend-x86 Author: None (yubingex007-a11y) ChangesFull diff: https://github.com/llvm/llvm-project/pull/80106.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 531e00862558c..82753a74ba0dd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32815,10 +32815,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
// No other ValueType for FP_EXTEND should reach this point.
assert(N->getValueType(0) == MVT::v2f32 &&
"Do not know how to legalize this Node");
- if (!Subtarget.hasFP16() || !Subtarget.hasVLX())
- return;
bool IsStrict = N->isStrictFPOpcode();
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
+ if (!Subtarget.hasFP16() || !Subtarget.hasVLX() ||
+ Src.getValueType().getVectorElementType() != MVT::f16)
+ return;
SDValue Ext = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v2f16)
: DAG.getUNDEF(MVT::v2f16);
SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f16, Src, Ext);
diff --git a/llvm/test/CodeGen/X86/concat-fpext-v2bf16.ll b/llvm/test/CodeGen/X86/concat-fpext-v2bf16.ll
new file mode 100644
index 0000000000000..d56bb62d22f06
--- /dev/null
+++ b/llvm/test/CodeGen/X86/concat-fpext-v2bf16.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512bf16,avx512fp16 | FileCheck %s
+define dso_local void @calc_matrix_tdpbf16ps(<2 x ptr> %ptr) local_unnamed_addr #0 {
+; CHECK-LABEL: calc_matrix_tdpbf16ps:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je .LBB0_1
+; CHECK-NEXT: # %bb.2: # %loop.127.preheader
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB0_1: # %ifmerge.89
+; CHECK-NEXT: movzwl (%rax), %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: vmovd %eax, %xmm0
+; CHECK-NEXT: vmulss %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
+; CHECK-NEXT: vmovlps %xmm0, (%rax)
+entry:
+ br label %then.13
+
+then.13: ; preds = %entry
+ %0 = fpext bfloat poison to float
+ br i1 poison, label %loop.127.preheader, label %ifmerge.89
+
+ifmerge.89: ; preds = %then.13
+ %.splatinsert144 = insertelement <2 x float> poison, float %0, i64 0
+ %.splat145 = shufflevector <2 x float> %.splatinsert144, <2 x float> poison, <2 x i32> zeroinitializer
+ %1 = tail call <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr> undef, i32 2, <2 x i1> <i1 true, i1 true>, <2 x bfloat> poison)
+ %2 = fpext <2 x bfloat> %1 to <2 x float>
+ %3 = fmul fast <2 x float> %.splat145, %2
+ %4 = fadd fast <2 x float> zeroinitializer, %3
+ store <2 x float> %4, ptr poison, align 4
+ unreachable
+
+loop.127.preheader: ; preds = %then.13
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
+declare <2 x bfloat> @llvm.masked.gather.v2bf16.v2p0(<2 x ptr>, i32 immarg, <2 x i1>, <2 x bfloat>) #1
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with some nits.
if (!Subtarget.hasFP16() || !Subtarget.hasVLX() || | ||
Src.getValueType().getVectorElementType() != MVT::f16) | ||
return; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's better to just add
if (Src.getValueType().getVectorElementType() != MVT::f1)
return;
@@ -0,0 +1,41 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512bf16,avx512fp16 | FileCheck %s | |||
define dso_local void @calc_matrix_tdpbf16ps(<2 x ptr> %ptr) local_unnamed_addr #0 { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add a blank line and remove dso_local
local_unnamed_addr #0
3a4a336
to
e52d3bd
Compare
if (!Subtarget.hasFP16() || !Subtarget.hasVLX()) | ||
return; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I mean these lines should not be changed.
bc7b440
to
8de8969
Compare
No description provided.