-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64] Extend int-to-fp load optimization to support f16 #168076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Guy David (guy-david) ChangesFull diff: https://github.com/llvm/llvm-project/pull/168076.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f5f732da99349..2325363bc2e0d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19988,7 +19988,9 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
return Res;
EVT VT = N->getValueType(0);
- if (VT != MVT::f32 && VT != MVT::f64)
+ if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+ if (VT == MVT::f16 && !Subtarget->hasFullFP16())
return SDValue();
// Only optimize when the source and destination types have the same width.
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 84f0d47c02bad..7297ffc80d3a8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -7014,6 +7014,19 @@ multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
sub))>;
}
+let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
+defm : UIntToFPROLoadPat<f16, i32, zextloadi8,
+ UCVTFv1i16, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f16 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f16 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i16 (INSERT_SUBREG (f16 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+}
+
defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
def : Pat <(f32 (uint_to_fp (i32
diff --git a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
index 478ccf58f32c5..56e4b1988b8d1 100644
--- a/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
+++ b/llvm/test/CodeGen/AArch64/int-to-fp-no-neon.ll
@@ -43,11 +43,17 @@ entry:
}
define half @ui8_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: ui8_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui8_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr b0, [x0]
+; NEON-ENABLED-NEXT: ucvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui8_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrb w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i8, ptr %i, align 1
%conv = uitofp i8 %ld to half
@@ -91,11 +97,17 @@ entry:
}
define half @ui16_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: ui16_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: ucvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: ui16_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: ucvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: ui16_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrh w8, [x0]
+; NEON-DISABLED-NEXT: ucvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, ptr %i, align 1
%conv = uitofp i16 %ld to half
@@ -277,11 +289,17 @@ entry:
}
define half @si16_to_half(ptr %i, ptr %f) {
-; CHECK-LABEL: si16_to_half:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: ldrsh w8, [x0]
-; CHECK-NEXT: scvtf h0, w8
-; CHECK-NEXT: ret
+; NEON-ENABLED-LABEL: si16_to_half:
+; NEON-ENABLED: // %bb.0: // %entry
+; NEON-ENABLED-NEXT: ldr h0, [x0]
+; NEON-ENABLED-NEXT: scvtf h0, h0
+; NEON-ENABLED-NEXT: ret
+;
+; NEON-DISABLED-LABEL: si16_to_half:
+; NEON-DISABLED: // %bb.0: // %entry
+; NEON-DISABLED-NEXT: ldrsh w8, [x0]
+; NEON-DISABLED-NEXT: scvtf h0, w8
+; NEON-DISABLED-NEXT: ret
entry:
%ld = load i16, ptr %i, align 1
%conv = sitofp i16 %ld to half
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index caf87a13f283b..56dd08feb2e15 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -1090,11 +1090,17 @@ define half @stofp_load_i16_f16(ptr %p) {
; CHECK-NOFP16-NEXT: fcvt h0, s0
; CHECK-NOFP16-NEXT: ret
;
-; CHECK-FP16-LABEL: stofp_load_i16_f16:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrsh w8, [x0]
-; CHECK-FP16-NEXT: scvtf h0, w8
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: stofp_load_i16_f16:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: ldr h0, [x0]
+; CHECK-FP16-SD-NEXT: scvtf h0, h0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: stofp_load_i16_f16:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: ldrsh w8, [x0]
+; CHECK-FP16-GI-NEXT: scvtf h0, w8
+; CHECK-FP16-GI-NEXT: ret
entry:
%a = load i16, ptr %p
%c = sitofp i16 %a to half
@@ -1109,11 +1115,17 @@ define half @utofp_load_i16_f16(ptr %p) {
; CHECK-NOFP16-NEXT: fcvt h0, s0
; CHECK-NOFP16-NEXT: ret
;
-; CHECK-FP16-LABEL: utofp_load_i16_f16:
-; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrh w8, [x0]
-; CHECK-FP16-NEXT: ucvtf h0, w8
-; CHECK-FP16-NEXT: ret
+; CHECK-FP16-SD-LABEL: utofp_load_i16_f16:
+; CHECK-FP16-SD: // %bb.0: // %entry
+; CHECK-FP16-SD-NEXT: ldr h0, [x0]
+; CHECK-FP16-SD-NEXT: ucvtf h0, h0
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: utofp_load_i16_f16:
+; CHECK-FP16-GI: // %bb.0: // %entry
+; CHECK-FP16-GI-NEXT: ldrh w8, [x0]
+; CHECK-FP16-GI-NEXT: ucvtf h0, w8
+; CHECK-FP16-GI-NEXT: ret
entry:
%a = load i16, ptr %p
%c = uitofp i16 %a to half
@@ -1149,8 +1161,8 @@ define half @utofp_load_i8_f16(ptr %p) {
;
; CHECK-FP16-LABEL: utofp_load_i8_f16:
; CHECK-FP16: // %bb.0: // %entry
-; CHECK-FP16-NEXT: ldrb w8, [x0]
-; CHECK-FP16-NEXT: ucvtf h0, w8
+; CHECK-FP16-NEXT: ldr b0, [x0]
+; CHECK-FP16-NEXT: ucvtf h0, h0
; CHECK-FP16-NEXT: ret
entry:
%a = load i8, ptr %p
|
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Im not sure we should have two ways of doing this - I think the bitcast in DAG might be able to handle both of them now.
With as we have things this LGTM though. Thanks
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/116/builds/21332 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/110/builds/6459 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/19198 Here is the relevant piece of the build log for the reference |
No description provided.