-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Revert "[AArch64] Improve lowering of truncating uzp1" #85115
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This reverts commit 57b991a.
joker-eph
added
the
skip-precommit-approval
PR for CI feedback, not intended for review
label
Mar 13, 2024
@llvm/pr-subscribers-backend-aarch64 Author: Mehdi Amini (joker-eph) ChangesReverts llvm/llvm-project#82457 The bot is broken, likely because of mid-air collision. Patch is 40.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/85115.diff 17 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9665ae5ceb903f..5b7a36d2eba76f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21423,8 +21423,12 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
}
}
- // These optimizations only work on little endian.
- if (!DAG.getDataLayout().isLittleEndian())
+ // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
+ // Only implemented on little-endian subtargets.
+ bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
+
+ // This optimization only works on little endian.
+ if (!IsLittleEndian)
return SDValue();
// uzp1(bitcast(x), bitcast(y)) -> uzp1(x, y)
@@ -21443,28 +21447,21 @@ static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG,
if (ResVT != MVT::v2i32 && ResVT != MVT::v4i16 && ResVT != MVT::v8i8)
return SDValue();
- SDValue SourceOp0 = peekThroughBitcasts(Op0);
- SDValue SourceOp1 = peekThroughBitcasts(Op1);
+ auto getSourceOp = [](SDValue Operand) -> SDValue {
+ const unsigned Opcode = Operand.getOpcode();
+ if (Opcode == ISD::TRUNCATE)
+ return Operand->getOperand(0);
+ if (Opcode == ISD::BITCAST &&
+ Operand->getOperand(0).getOpcode() == ISD::TRUNCATE)
+ return Operand->getOperand(0)->getOperand(0);
+ return SDValue();
+ };
- // truncating uzp1(x, y) -> xtn(concat (x, y))
- if (SourceOp0.getValueType() == SourceOp1.getValueType()) {
- EVT Op0Ty = SourceOp0.getValueType();
- if ((ResVT == MVT::v4i16 && Op0Ty == MVT::v2i32) ||
- (ResVT == MVT::v8i8 && Op0Ty == MVT::v4i16)) {
- SDValue Concat =
- DAG.getNode(ISD::CONCAT_VECTORS, DL,
- Op0Ty.getDoubleNumVectorElementsVT(*DAG.getContext()),
- SourceOp0, SourceOp1);
- return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Concat);
- }
- }
+ SDValue SourceOp0 = getSourceOp(Op0);
+ SDValue SourceOp1 = getSourceOp(Op1);
- // uzp1(xtn x, xtn y) -> xtn(uzp1 (x, y))
- if (SourceOp0.getOpcode() != ISD::TRUNCATE ||
- SourceOp1.getOpcode() != ISD::TRUNCATE)
+ if (!SourceOp0 || !SourceOp1)
return SDValue();
- SourceOp0 = SourceOp0.getOperand(0);
- SourceOp1 = SourceOp1.getOperand(0);
if (SourceOp0.getValueType() != SourceOp1.getValueType() ||
!SourceOp0.getValueType().isSimple())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index b4b975cce007ac..6254e68326f79d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6153,39 +6153,26 @@ defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
-def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
- [(trunc node:$op0),
- (assertzext (trunc node:$op0)),
- (assertsext (trunc node:$op0))]>;
-
-// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
-// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
-// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
-class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
- : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
- (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
- (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
-def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
-def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
-def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
-
-// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
-// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
-// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
-class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
- ValueType Ty>
- : Pat<(Ty (trunc_optional_assert_ext
- (ConcatTy (concat_vectors
- (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
- (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
- (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
-def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
-def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
-
-def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
- (UZP1v8i8 V64:$Vn, V64:$Vm)>;
-def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
- (UZP1v4i16 V64:$Vn, V64:$Vm)>;
+def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
+ (v8i8 (trunc (v8i16 V128:$Vm))))),
+ (UZP1v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
+ (v4i16 (trunc (v4i32 V128:$Vm))))),
+ (UZP1v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
+ (v2i32 (trunc (v2i64 V128:$Vm))))),
+ (UZP1v4i32 V128:$Vn, V128:$Vm)>;
+// These are the same as above, with an optional assertzext node that can be
+// generated from fptoi lowering.
+def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
+ (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
+ (UZP1v16i8 V128:$Vn, V128:$Vm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
+ (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
+ (UZP1v8i16 V128:$Vn, V128:$Vm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
+ (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
+ (UZP1v4i32 V128:$Vn, V128:$Vm)>;
def : Pat<(v16i8 (concat_vectors
(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
index 3007e7ce771e62..49325299f74a12 100644
--- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll
@@ -8,8 +8,9 @@ define <4 x i16> @fptosi_v4f64_to_v4i16(ptr %ptr) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x double>, ptr %ptr
%tmp2 = fptosi <4 x double> %tmp1 to <4 x i16>
@@ -25,10 +26,13 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(ptr %ptr) {
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-NEXT: fcvtzs v3.2d, v3.2d
; CHECK-NEXT: fcvtzs v2.2d, v2.2d
-; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-NEXT: uzp1 v1.4s, v2.4s, v3.4s
-; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
-; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: xtn v3.2s, v3.2d
+; CHECK-NEXT: xtn v2.2s, v2.2d
+; CHECK-NEXT: uzp1 v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: uzp1 v1.4h, v2.4h, v3.4h
+; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b
; CHECK-NEXT: ret
%tmp1 = load <8 x double>, ptr %ptr
%tmp2 = fptosi <8 x double> %tmp1 to <8 x i8>
@@ -92,8 +96,9 @@ define <4 x i16> @fptoui_v4f64_to_v4i16(ptr %ptr) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: xtn v1.2s, v1.2d
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
%tmp1 = load <4 x double>, ptr %ptr
%tmp2 = fptoui <4 x double> %tmp1 to <4 x i16>
diff --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll
index dff4831330deb0..1f68c77611e10d 100644
--- a/llvm/test/CodeGen/AArch64/extbinopload.ll
+++ b/llvm/test/CodeGen/AArch64/extbinopload.ll
@@ -650,7 +650,7 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: add x11, x3, #12
; CHECK-NEXT: str s1, [x4]
; CHECK-NEXT: ushll v1.8h, v1.8b, #0
-; CHECK-NEXT: ldp s0, s4, [x2]
+; CHECK-NEXT: ldp s0, s5, [x2]
; CHECK-NEXT: ushll v2.8h, v0.8b, #0
; CHECK-NEXT: umov w9, v2.h[0]
; CHECK-NEXT: umov w10, v2.h[1]
@@ -662,25 +662,24 @@ define <16 x i32> @extrause_load(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) {
; CHECK-NEXT: ushll v2.8h, v2.8b, #0
; CHECK-NEXT: mov v0.b[10], w9
; CHECK-NEXT: add x9, x1, #4
-; CHECK-NEXT: mov v1.d[1], v2.d[0]
+; CHECK-NEXT: uzp1 v1.8b, v1.8b, v2.8b
; CHECK-NEXT: mov v0.b[11], w10
; CHECK-NEXT: add x10, x1, #12
-; CHECK-NEXT: bic v1.8h, #255, lsl #8
; CHECK-NEXT: ld1 { v0.s }[3], [x3], #4
-; CHECK-NEXT: ldr s3, [x0, #12]
-; CHECK-NEXT: ldp s2, s7, [x0, #4]
-; CHECK-NEXT: ld1 { v4.s }[1], [x3]
-; CHECK-NEXT: ldp s5, s6, [x2, #8]
-; CHECK-NEXT: ld1 { v3.s }[1], [x10]
-; CHECK-NEXT: ld1 { v2.s }[1], [x9]
-; CHECK-NEXT: ld1 { v5.s }[1], [x8]
-; CHECK-NEXT: ld1 { v6.s }[1], [x11]
+; CHECK-NEXT: ldr s4, [x0, #12]
+; CHECK-NEXT: ldp s3, s16, [x0, #4]
+; CHECK-NEXT: ld1 { v5.s }[1], [x3]
+; CHECK-NEXT: ldp s6, s7, [x2, #8]
+; CHECK-NEXT: ld1 { v4.s }[1], [x10]
+; CHECK-NEXT: ld1 { v3.s }[1], [x9]
+; CHECK-NEXT: ld1 { v6.s }[1], [x8]
+; CHECK-NEXT: ld1 { v7.s }[1], [x11]
; CHECK-NEXT: add x8, x1, #8
-; CHECK-NEXT: ld1 { v7.s }[1], [x8]
-; CHECK-NEXT: uaddl v2.8h, v2.8b, v3.8b
-; CHECK-NEXT: ushll v3.8h, v5.8b, #0
-; CHECK-NEXT: uaddl v4.8h, v4.8b, v6.8b
-; CHECK-NEXT: uaddw v1.8h, v1.8h, v7.8b
+; CHECK-NEXT: ld1 { v16.s }[1], [x8]
+; CHECK-NEXT: uaddl v2.8h, v3.8b, v4.8b
+; CHECK-NEXT: ushll v3.8h, v6.8b, #0
+; CHECK-NEXT: uaddl v4.8h, v5.8b, v7.8b
+; CHECK-NEXT: uaddl v1.8h, v1.8b, v16.8b
; CHECK-NEXT: uaddw2 v5.8h, v3.8h, v0.16b
; CHECK-NEXT: ushll v0.4s, v2.4h, #3
; CHECK-NEXT: ushll2 v2.4s, v2.8h, #3
diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
index 0a3b9a070c2b32..1ea87bb6b04b51 100644
--- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll
@@ -73,8 +73,9 @@ define void @fptoui_v8f32_to_v8i8_no_loop(ptr %A, ptr %dst) {
; CHECK-NEXT: ldp q0, q1, [x0]
; CHECK-NEXT: fcvtzs.4s v1, v1
; CHECK-NEXT: fcvtzs.4s v0, v0
-; CHECK-NEXT: uzp1.8h v0, v0, v1
-; CHECK-NEXT: xtn.8b v0, v0
+; CHECK-NEXT: xtn.4h v1, v1
+; CHECK-NEXT: xtn.4h v0, v0
+; CHECK-NEXT: uzp1.8b v0, v0, v1
; CHECK-NEXT: str d0, [x1]
; CHECK-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 7af01b53dae7e8..67190e8596c46c 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -1096,17 +1096,30 @@ entry:
}
define <3 x i16> @fptos_v3f64_v3i16(<3 x double> %a) {
-; CHECK-LABEL: fptos_v3f64_v3i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
-; CHECK-NEXT: mov v0.d[1], v1.d[0]
-; CHECK-NEXT: fcvtzs v1.2d, v2.2d
-; CHECK-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: xtn v0.4h, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: fptos_v3f64_v3i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: fptos_v3f64_v3i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: fcvtzs v1.2d, v2.2d
+; CHECK-GI-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-GI-NEXT: uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: ret
entry:
%c = fptosi <3 x double> %a to <3 x i16>
ret <3 x i16> %c
@@ -1121,8 +1134,9 @@ define <3 x i16> @fptou_v3f64_v3i16(<3 x double> %a) {
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v3f64_v3i16:
@@ -1146,8 +1160,9 @@ define <4 x i16> @fptos_v4f64_v4i16(<4 x double> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v4f64_v4i16:
@@ -1167,8 +1182,9 @@ define <4 x i16> @fptou_v4f64_v4i16(<4 x double> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v4f64_v4i16:
@@ -1584,8 +1600,9 @@ define <3 x i8> @fptos_v3f64_v3i8(<3 x double> %a) {
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: umov w0, v0.h[0]
; CHECK-SD-NEXT: umov w1, v0.h[1]
; CHECK-SD-NEXT: umov w2, v0.h[2]
@@ -1621,8 +1638,9 @@ define <3 x i8> @fptou_v3f64_v3i8(<3 x double> %a) {
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: fcvtzs v1.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: umov w0, v0.h[0]
; CHECK-SD-NEXT: umov w1, v0.h[1]
; CHECK-SD-NEXT: umov w2, v0.h[2]
@@ -1654,8 +1672,9 @@ define <4 x i8> @fptos_v4f64_v4i8(<4 x double> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v4f64_v4i8:
@@ -1675,8 +1694,9 @@ define <4 x i8> @fptou_v4f64_v4i8(<4 x double> %a) {
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: xtn v0.4h, v0.4s
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v4f64_v4i8:
@@ -1698,10 +1718,13 @@ define <8 x i8> @fptos_v8f64_v8i8(<8 x double> %a) {
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v8f64_v8i8:
@@ -1727,10 +1750,13 @@ define <8 x i8> @fptou_v8f64_v8i8(<8 x double> %a) {
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: xtn v0.8b, v0.8h
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v2.8b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v8f64_v8i8:
@@ -1760,13 +1786,21 @@ define <16 x i8> @fptos_v16f64_v16i8(<16 x double> %a) {
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s
-; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s
-; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h
-; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: xtn v7.2s, v7.2d
+; CHECK-SD-NEXT: xtn v6.2s, v6.2d
+; CHECK-SD-NEXT: xtn v5.2s, v5.2d
+; CHECK-SD-NEXT: xtn v4.2s, v4.2d
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptos_v16f64_v16i8:
@@ -1803,13 +1837,21 @@ define <16 x i8> @fptou_v16f64_v16i8(<16 x double> %a) {
; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d
; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d
; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d
-; CHECK-SD-NEXT: uzp1 v6.4s, v6.4s, v7.4s
-; CHECK-SD-NEXT: uzp1 v4.4s, v4.4s, v5.4s
-; CHECK-SD-NEXT: uzp1 v2.4s, v2.4s, v3.4s
-; CHECK-SD-NEXT: uzp1 v0.4s, v0.4s, v1.4s
-; CHECK-SD-NEXT: uzp1 v1.8h, v4.8h, v6.8h
-; CHECK-SD-NEXT: uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: xtn v7.2s, v7.2d
+; CHECK-SD-NEXT: xtn v6.2s, v6.2d
+; CHECK-SD-NEXT: xtn v5.2s, v5.2d
+; CHECK-SD-NEXT: xtn v4.2s, v4.2d
+; CHECK-SD-NEXT: xtn v3.2s, v3.2d
+; CHECK-SD-NEXT: xtn v2.2s, v2.2d
+; CHECK-SD-NEXT: xtn v1.2s, v1.2d
+; CHECK-SD-NEXT: xtn v0.2s, v0.2d
+; CHECK-SD-NEXT: uzp1 v6.4h, v6.4h, v7.4h
+; CHECK-SD-NEXT: uzp1 v4.4h, v4.4h, v5.4h
+; CHECK-SD-NEXT: uzp1 v2.4h, v2.4h, v3.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: mov v4.d[1], v6.d[0]
+; CHECK-SD-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-SD-NEXT: uzp1 v0.16b, v0.16b, v4.16b
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: fptou_v16f64_v16i8:
@@ -1858,20 +1900,36 @@ define <32 x i8> @...
[truncated]
|
I fixed the tests already, in 79cd2c0 so a revert shouldn't be needed. |
UsmanNadeem
added a commit
that referenced
this pull request
Mar 13, 2024
This reverts commit 06e310f.
UsmanNadeem
added a commit
that referenced
this pull request
Mar 13, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #82457
The bot is broken, likely because of mid-air collision.