Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AArch64][GlobalISel] Legalize Shifts for Smaller/Larger Vectors #78750

Merged
merged 2 commits into from
Jan 22, 2024

Conversation

chuongg3
Copy link
Contributor

Legalize shl/lshr/ashr for smaller/larger vector widths with legal element sizes

Smaller than legal vector types does not work at the moment as it relies on G_ANYEXT to work with smaller than legal vector types

@llvmbot
Copy link
Collaborator

llvmbot commented Jan 19, 2024

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: None (chuongg3)

Changes

Legalize shl/lshr/ashr for smaller/larger vector widths with legal element sizes

Smaller than legal vector types does not work at the moment as it relies on G_ANYEXT to work with smaller than legal vector types


Patch is 51.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/78750.diff

5 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+4-1)
  • (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2)
  • (modified) llvm/test/CodeGen/AArch64/fcmp.ll (+174-129)
  • (modified) llvm/test/CodeGen/AArch64/sext.ll (+50-21)
  • (added) llvm/test/CodeGen/AArch64/shift.ll (+1091)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 91d2497fdb7e208..994ee6ebc94345f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5198,7 +5198,10 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case TargetOpcode::G_FMAXIMUM:
   case TargetOpcode::G_STRICT_FADD:
   case TargetOpcode::G_STRICT_FSUB:
-  case TargetOpcode::G_STRICT_FMUL: {
+  case TargetOpcode::G_STRICT_FMUL:
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR: {
     Observer.changingInstr(MI);
     moreElementsVectorSrc(MI, MoreTy, 1);
     moreElementsVectorSrc(MI, MoreTy, 2);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e94f9d0c68ffe78..ea3b215cf5b4828 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -169,6 +169,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
       .widenScalarToNextPow2(0)
       .clampScalar(1, s32, s64)
       .clampScalar(0, s32, s64)
+      .clampNumElements(0, v8s8, v16s8)
+      .clampNumElements(0, v4s16, v8s16)
       .clampNumElements(0, v2s32, v4s32)
       .clampNumElements(0, v2s64, v2s64)
       .moreElementsToNextPow2(0)
diff --git a/llvm/test/CodeGen/AArch64/fcmp.ll b/llvm/test/CodeGen/AArch64/fcmp.ll
index 82e29d0f8a194f1..d3bc7fc6dc0634b 100644
--- a/llvm/test/CodeGen/AArch64/fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/fcmp.ll
@@ -8,9 +8,9 @@
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v3f64_i32
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v3f32_float
 ; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v3f32_i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v7f16_half
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for v7f16_half
 ; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for v16f16_half
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for v7f16_i32
+; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for v7f16_i32
 ; CHECK-GI-FP16-NEXT:  warning: Instruction selection used fallback path for v16f16_i32
 
 define double @f64_double(double %a, double %b, double %d, double %e) {
@@ -437,62 +437,87 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
 ;
 ; CHECK-GI-NOFP16-LABEL: v7f16_half:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h0
-; CHECK-GI-NOFP16-NEXT:    mov h16, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov w9, #15 // =0xf
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h1
+; CHECK-GI-NOFP16-NEXT:    mov h16, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h19, v1.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h20, v1.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h4
 ; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcmp s5, s4
-; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s7, s6
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcmp s6, s7
 ; CHECK-GI-NOFP16-NEXT:    fcvt s7, h16
-; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    mov h16, v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    csetm w9, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s4, s7
-; CHECK-GI-NOFP16-NEXT:    fmov s4, w9
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt s16, h16
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[1], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s6, s5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[5]
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[2], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcmp s16, s7
-; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h16, v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[3], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s6, s5
-; CHECK-GI-NOFP16-NEXT:    fcvt s5, h7
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h16
+; CHECK-GI-NOFP16-NEXT:    fmov s6, w9
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h17
+; CHECK-GI-NOFP16-NEXT:    mov h17, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s19, h19
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s18, s5
+; CHECK-GI-NOFP16-NEXT:    mov h18, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    fmov s5, w8
+; CHECK-GI-NOFP16-NEXT:    cset w9, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s7, s16
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h4
+; CHECK-GI-NOFP16-NEXT:    fcvt s16, h17
+; CHECK-GI-NOFP16-NEXT:    fmov s17, w9
+; CHECK-GI-NOFP16-NEXT:    mov v4.16b, v6.16b
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h18
 ; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[1], v17.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[1], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov h17, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    fcmp s7, s16
+; CHECK-GI-NOFP16-NEXT:    fmov s7, w8
+; CHECK-GI-NOFP16-NEXT:    mov w8, #65535 // =0xffff
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[2], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    fmov s7, w8
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[2], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s18, s19
+; CHECK-GI-NOFP16-NEXT:    fcvt s17, h17
+; CHECK-GI-NOFP16-NEXT:    fcvt s18, h20
+; CHECK-GI-NOFP16-NEXT:    fmov s19, w8
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[4], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s6, s5
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[5], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
+; CHECK-GI-NOFP16-NEXT:    mov v16.16b, v7.16b
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[3], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[3], v19.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[1], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    fcmp s17, s18
+; CHECK-GI-NOFP16-NEXT:    fmov s17, w8
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[4], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[4], v17.h[0]
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
 ; CHECK-GI-NOFP16-NEXT:    fcmp s0, s1
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[6], w8
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[7], w8
-; CHECK-GI-NOFP16-NEXT:    mov v0.16b, v4.16b
-; CHECK-GI-NOFP16-NEXT:    bsl v0.16b, v2.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[2], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[5], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[5], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w8
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[3], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[6], v6.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[6], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[4], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v5.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[5], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    neg v0.8h, v4.8h
+; CHECK-GI-NOFP16-NEXT:    ushl v1.8h, v5.8h, v4.8h
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[6], v7.h[0]
+; CHECK-GI-NOFP16-NEXT:    mov v16.h[7], v0.h[0]
+; CHECK-GI-NOFP16-NEXT:    sshl v0.8h, v1.8h, v0.8h
+; CHECK-GI-NOFP16-NEXT:    eor v1.16b, v0.16b, v16.16b
+; CHECK-GI-NOFP16-NEXT:    and v0.16b, v2.16b, v0.16b
+; CHECK-GI-NOFP16-NEXT:    and v1.16b, v3.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: v7f16_half:
@@ -1112,90 +1137,110 @@ define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32
 ;
 ; CHECK-GI-NOFP16-LABEL: v7f16_i32:
 ; CHECK-GI-NOFP16:       // %bb.0: // %entry
-; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[1]
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[2]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[2]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h1
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h0
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT:    mov w13, #31 // =0x1f
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h0
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h1
+; CHECK-GI-NOFP16-NEXT:    ldr s17, [sp, #40]
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[2]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[2]
+; CHECK-GI-NOFP16-NEXT:    fmov s16, w0
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
 ; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
+; CHECK-GI-NOFP16-NEXT:    fcmp s4, s5
+; CHECK-GI-NOFP16-NEXT:    mov h4, v0.h[3]
+; CHECK-GI-NOFP16-NEXT:    mov h5, v1.h[3]
+; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
+; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
+; CHECK-GI-NOFP16-NEXT:    mov v16.s[1], w1
+; CHECK-GI-NOFP16-NEXT:    cset w9, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s2, s3
+; CHECK-GI-NOFP16-NEXT:    mov h2, v0.h[4]
+; CHECK-GI-NOFP16-NEXT:    mov h3, v1.h[4]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s4, h4
 ; CHECK-GI-NOFP16-NEXT:    fcvt s5, h5
-; CHECK-GI-NOFP16-NEXT:    fcmp s3, s2
-; CHECK-GI-NOFP16-NEXT:    mov h2, v1.h[3]
-; CHECK-GI-NOFP16-NEXT:    mov h3, v0.h[3]
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s7, s6
-; CHECK-GI-NOFP16-NEXT:    mov h6, v1.h[5]
-; CHECK-GI-NOFP16-NEXT:    mov h7, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov v16.s[2], w2
+; CHECK-GI-NOFP16-NEXT:    cset w10, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s6, s7
+; CHECK-GI-NOFP16-NEXT:    mov h6, v0.h[5]
+; CHECK-GI-NOFP16-NEXT:    mov h7, v1.h[5]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s2, h2
+; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[6]
 ; CHECK-GI-NOFP16-NEXT:    fcvt s3, h3
-; CHECK-GI-NOFP16-NEXT:    csetm w9, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s5, s4
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[4]
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[4]
-; CHECK-GI-NOFP16-NEXT:    fcvt s6, h6
-; CHECK-GI-NOFP16-NEXT:    fcvt s7, h7
-; CHECK-GI-NOFP16-NEXT:    csetm w10, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s3, s2
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h4
-; CHECK-GI-NOFP16-NEXT:    mov h4, v1.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[7]
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h5
-; CHECK-GI-NOFP16-NEXT:    mov h5, v0.h[6]
-; CHECK-GI-NOFP16-NEXT:    mov h0, v0.h[7]
-; CHECK-GI-NOFP16-NEXT:    csetm w11, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s7, s6
-; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT:    csetm w12, mi
-; CHECK-GI-NOFP16-NEXT:    fcmp s3, s2
-; CHECK-GI-NOFP16-NEXT:    fcvt s2, h4
-; CHECK-GI-NOFP16-NEXT:    fcvt s3, h5
-; CHECK-GI-NOFP16-NEXT:    fmov s4, w9
+; CHECK-GI-NOFP16-NEXT:    mov h1, v1.h[6]
+; CHECK-GI-NOFP16-NEXT:    cset w11, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s4, s5
+; CHECK-GI-NOFP16-NEXT:    mov v16.s[3], w3
+; CHECK-GI-NOFP16-NEXT:    fcvt s4, h6
+; CHECK-GI-NOFP16-NEXT:    ldr s6, [sp, #32]
+; CHECK-GI-NOFP16-NEXT:    fcvt s5, h7
 ; CHECK-GI-NOFP16-NEXT:    fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT:    add x9, sp, #8
-; CHECK-GI-NOFP16-NEXT:    csetm w13, mi
-; CHECK-GI-NOFP16-NEXT:    fmov s5, w13
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[1], w8
-; CHECK-GI-NOFP16-NEXT:    mov x8, sp
-; CHECK-GI-NOFP16-NEXT:    fcmp s3, s2
-; CHECK-GI-NOFP16-NEXT:    fmov s2, w7
-; CHECK-GI-NOFP16-NEXT:    fmov s3, w0
-; CHECK-GI-NOFP16-NEXT:    mov v5.h[1], w12
-; CHECK-GI-NOFP16-NEXT:    ld1 { v2.s }[1], [x8]
-; CHECK-GI-NOFP16-NEXT:    mov v3.s[1], w1
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[2], w10
+; CHECK-GI-NOFP16-NEXT:    fmov s7, w4
+; CHECK-GI-NOFP16-NEXT:    cset w8, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s2, s3
+; CHECK-GI-NOFP16-NEXT:    fcvt s1, h1
+; CHECK-GI-NOFP16-NEXT:    fmov s2, w13
+; CHECK-GI-NOFP16-NEXT:    mov v7.s[1], w5
+; CHECK-GI-NOFP16-NEXT:    cset w12, mi
+; CHECK-GI-NOFP16-NEXT:    fcmp s4, s5
+; CHECK-GI-NOFP16-NEXT:    ldr s5, [sp]
+; CHECK-GI-NOFP16-NEXT:    fmov s3, w12
+; CHECK-GI-NOFP16-NEXT:    mov v2.s[1], w13
+; CHECK-GI-NOFP16-NEXT:    cset w14, mi
 ; CHECK-GI-NOFP16-NEXT:    fcmp s0, s1
-; CHECK-GI-NOFP16-NEXT:    fmov s1, w4
-; CHECK-GI-NOFP16-NEXT:    ldr s0, [sp, #24]
-; CHECK-GI-NOFP16-NEXT:    mov v5.h[2], w8
-; CHECK-GI-NOFP16-NEXT:    ld1 { v2.s }[2], [x9]
-; CHECK-GI-NOFP16-NEXT:    add x9, sp, #32
-; CHECK-GI-NOFP16-NEXT:    mov v3.s[2], w2
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[1], w5
-; CHECK-GI-NOFP16-NEXT:    csetm w8, mi
-; CHECK-GI-NOFP16-NEXT:    mov v4.h[3], w11
-; CHECK-GI-NOFP16-NEXT:    ld1 { v0.s }[1], [x9]
-; CHECK-GI-NOFP16-NEXT:    mov v5.h[3], w8
-; CHECK-GI-NOFP16-NEXT:    add x8, sp, #16
-; CHECK-GI-NOFP16-NEXT:    ld1 { v2.s }[3], [x8]
-; CHECK-GI-NOFP16-NEXT:    mov v3.s[3], w3
-; CHECK-GI-NOFP16-NEXT:    add x8, sp, #40
-; CHECK-GI-NOFP16-NEXT:    mov v1.s[2], w6
-; CHECK-GI-NOFP16-NEXT:    sshll v4.4s, v4.4h, #0
-; CHECK-GI-NOFP16-NEXT:    ld1 { v0.s }[2], [x8]
-; CHECK-GI-NOFP16-NEXT:    sshll v5.4s, v5.4h, #0
-; CHECK-GI-NOFP16-NEXT:    bit v2.16b, v3.16b, v4.16b
-; CHECK-GI-NOFP16-NEXT:    bit v0.16b, v1.16b, v5.16b
-; CHECK-GI-NOFP16-NEXT:    mov w1, v2.s[1]
-; CHECK-GI-NOFP16-NEXT:    mov w2, v2.s[2]
-; CHECK-GI-NOFP16-NEXT:    mov w3, v2.s[3]
-; CHECK-GI-NOFP16-NEXT:    fmov w0, s2
-; CHECK-GI-NOFP16-NEXT:    mov w5, v0.s[1]
-; CHECK-GI-NOFP16-NEXT:    mov w6, v0.s[2]
-; CHECK-GI-NOFP16-NEXT:    fmov w4, s0
+; CHECK-GI-NOFP16-NEXT:    fmov s0, w9
+; CHECK-GI-NOFP16-NEXT:    mov v3.s[1], w14
+; CHECK-GI-NOFP16-NEXT:    ldr s1, [sp, #24]
+; CHECK-GI-NOFP16-NEXT:    mov v7.s[2], w6
+; CHECK-GI-NOFP16-NEXT:    mov v2.s[2], w13
+; CHECK-GI-NOFP16-NEXT:    cset w9, mi
+; CHECK-GI-NOFP16-NEXT:    mov v0.s[1], w10
+; CHECK-GI-NOFP16-NEXT:    fmov w10, s5
+; CHECK-GI-NOFP16-NEXT:    fmov s5, w7
+; CHECK-GI-NOFP16-NEXT:    mov v1.s[1], v6.s[0]
+; CHECK-GI-NOFP16-NEXT:    ldr s6, [sp, #8]
+; CHECK-GI-NOFP16-NEXT:    mov v3.s[2], w9
+; CHECK-GI-NOFP16-NEXT:    mov w9, #-1 // =0xffffffff
+; CHECK-GI-NOFP16-NEXT:    fmov s4, w9
+; CHECK-GI-NOFP16-NEXT:    mov v2.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    mov v0.s[2], w11
+; CHECK-GI-NOFP16-NEXT:    mov v5.s[1], w10
+; CHECK-GI-NOFP16-NEXT:    mov v1.s[2], v17.s[0]
+; CHECK-GI-NOFP16-NEXT:    mov v4.s[1], w9
+; CHECK-GI-NOFP16-NEXT:    mov v3.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    neg v18.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s6
+; CHECK-GI-NOFP16-NEXT:    mov v4.s[2], w9
+; CHECK-GI-NOFP16-NEXT:    ushl v2.4s, v3.4s, v2.4s
+; CHECK-GI-NOFP16-NEXT:    ldr s3, [sp, #16]
+; CHECK-GI-NOFP16-NEXT:    mov v5.s[2], w8
+; CHECK-GI-NOFP16-NEXT:    mov v7.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    shl v0.4s, v0.4s, #31
+; CHECK-GI-NOFP16-NEXT:    sshl v2.4s, v2.4s, v18.4s
+; CHECK-GI-NOFP16-NEXT:    mov v4.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    fmov w8, s3
+; CHECK-GI-NOFP16-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-GI-NOFP16-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NOFP16-NEXT:    mov v5.s[3], w8
+; CHECK-GI-NOFP16-NEXT:    eor v3.16b, v2.16b, v4.16b
+; CHECK-GI-NOFP16-NEXT:    and v2.16b, v7.16b, v2.16b
+; CHECK-GI-NOFP16-NEXT:    and v1.16b, v1.16b, v3.16b
+; CHECK-GI-NOFP16-NEXT:    bsl v0.16b, v16.16b, v5.16b
+; CHECK-GI-NOFP16-NEXT:    orr v1.16b, v2.16b, v1.16b
+; CHECK-GI-NOFP16-NEXT:    mov s2, v0.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s3, v0.s[2]
+; CHECK-GI-NOFP16-NEXT:    mov s4, v0.s[3]
+; CHECK-GI-NOFP16-NEXT:    fmov w0, s0
+; CHECK-GI-NOFP16-NEXT:    mov s5, v1.s[1]
+; CHECK-GI-NOFP16-NEXT:    mov s6, v1.s[2]
+; CHECK-GI-NOFP16-NEXT:    fmov w4, s1
+; CHECK-GI-NOFP16-NEXT:    fmov w1, s2
+; CHECK-GI-NOFP16-NEXT:    fmov w2, s3
+; CHECK-GI-NOFP16-NEXT:    fmov w3, s4
+; CHECK-GI-NOFP16-NEXT:    fmov w5, s5
+; CHECK-GI-NOFP16-NEXT:    fmov w6, s6
 ; CHECK-GI-NOFP16-NEXT:    ret
 ;
 ; CHECK-GI-FP16-LABEL: v7f16_i32:
diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll
index dd53780be14c168..f319721e0f2f0f2 100644
--- a/llvm/test/CodeGen/AArch64/sext.ll
+++ b/llvm/test/CodeGen/AArch64/sext.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
 ; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
 
-; CHECK-GI:       warning: Instruction selection used fallback path for sext_v3i8_v3i32
-; CHECK-GI-NEXT:  warning: Instruction selection used fallback path for sext_v3i10_v3i32
-
 define i16 @sext_i8_to_i16(i8 %a) {
 ; CHECK-LABEL: sext_i8_to_i16:
 ; CHECK:       // %bb.0: // %entry
@@ -236,15 +233,31 @@ entry:
 }
 
 define <3 x i32> @sext_v3i8_v3i32(<3 x i8> %a) {
-; CHECK-LABEL: sext_v3i8_v3i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    mov v0.h[1], w1
-; CHECK-NEXT:    mov v0.h[2], w2
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    shl v0.4s, v0.4s, #24
-; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sext_v3i8_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-SD-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i8_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #24 // =0x18
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.s[1], w1
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    mov v1.s[2], w2
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    neg v2.4s, v0.4s
+; CHECK-GI-NEXT:    ushl v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    sshl v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i8> %a to <3 x i32>
   ret <3 x i32> %c
@@ -388,15 +401,31 @@ entry:
 }
 
 define <3 x i32> @sext_v3i10_v3i32(<3 x i10> %a) {
-; CHECK-LABEL: sext_v3i10_v3i32:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fmov s0, w0
-; CHECK-NEXT:    mov v0.h[1], w1
-; CHECK-NEXT:    mov v0.h[2], w2
-; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
-; CHECK-NEXT:    shl v0.4s, v0.4s, #22
-; CHECK-NEXT:    sshr v0.4s, v0.4s, #22
-; CHECK-NEXT:    ret
+; CHECK-SD-LABEL: sext_v3i10_v3i32:
+; CHECK-SD:       // %bb.0: // %entry
+; CHECK-SD-NEXT:    fmov s0, w0
+; CHECK-SD-NEXT:    mov v0.h[1], w1
+; CHECK-SD-NEXT:    mov v0.h[2], w2
+; CHECK-SD-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT:    shl v0.4s, v0.4s, #22
+; CHECK-SD-NEXT:    sshr v0.4s, v0.4s, #22
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: sext_v3i10_v3i32:
+; CHECK-GI:       // %bb.0: // %entry
+; CHECK-GI-NEXT:    mov w8, #22 // =0x16
+; CHECK-GI-NEXT:    fmov s1, w0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.s[1], w1
+; CHECK-GI-NEXT:    mov v0.s[1], w8
+; CHECK-GI-NEXT:    mov v1.s[2], w2
+; CHECK-GI-NEXT:    mov v0.s[2], w8
+; CHECK-GI-NEXT:    mov v1.s[3], w8
+; CHECK-GI-NEXT:    mov v0.s[3], w8
+; CHECK-GI-NEXT:    neg v2.4s, v0.4s
+; CHECK-GI-NEXT:    ushl v0.4s, v1.4s, v0.4s
+; CHECK-GI-NEXT:    sshl v0.4s, v0.4s, v2.4s
+; CHECK-GI-NEXT:    ret
 entry:
   %c = sext <3 x i10> %a to <3 x i32>
   ret <3 x i32> %c
diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
new file mode 100644
index 000000000000000..15c8e1792f3d313
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -0,0 +1,1091 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ...
[truncated]

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This LGTM. We will have to try and do something about the ANYEXT too

@chuongg3 chuongg3 merged commit bfef161 into llvm:main Jan 22, 2024
3 of 4 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants