[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972

marcauberer · 2024-03-28T16:25:09Z

Fixes #86917

FCMP_TRUE and FCMP_FALSE were previously not considered and we ended up in an llvm_unreachable assertion.

…tion

llvmbot · 2024-03-28T16:25:45Z

@llvm/pr-subscribers-backend-aarch64

Author: Marc Auberer (marcauberer)

Changes

Fixes #86917

FCMP_TRUE and FCMP_FALSE were previously not considered and we ended up in an llvm_unreachable assertion.

Full diff: https://github.com/llvm/llvm-project/pull/86972.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp (+6)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/select.mir (+20)
(modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+101)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b8..80fe4bcb8b58f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
   case CmpInst::FCMP_UNE:
     CondCode = AArch64CC::NE;
     break;
+  case CmpInst::FCMP_TRUE:
+    CondCode = AArch64CC::AL;
+    break;
+  case CmpInst::FCMP_FALSE:
+    CondCode = AArch64CC::NV;
+    break;
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index bcdd77ac57570d..e207a31063bacf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
   - { id: 5, class: gpr }
   - { id: 6, class: gpr }
   - { id: 7, class: gpr }
+  - { id: 8, class: fpr }
+  - { id: 9, class: gpr }
+  - { id: 10, class: fpr }
+  - { id: 11, class: gpr }
+  - { id: 12, class: gpr }
+  - { id: 13, class: gpr }
+  - { id: 14, class: gpr }
+  - { id: 15, class: gpr }
 
 # CHECK:  body:
 # CHECK:    nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body:             |
     %7(s32) = G_ANYEXT %5
     $w0 = COPY %7(s32)
 
+    %8(s32) = COPY $s0
+    %9(s32) = G_FCMP floatpred(true), %8, %8
+    %12(s8) = G_TRUNC %9(s32)
+    %14(s32) = G_ANYEXT %12
+    $w0 = COPY %14(s32)
+
+    %10(s64) = COPY $d0
+    %11(s32) = G_FCMP floatpred(false), %10, %10
+    %13(s8) = G_TRUNC %11(s32)
+    %15(s32) = G_ANYEXT %13
+    $w0 = COPY %15(s32)
+
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 632b6b32625022..dbb5dfebd44abc 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
   ret <2 x i64> %tmp4
 }
 
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #1
+; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI221_0
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %tmp3 = fcmp false <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #0 // =0x0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp false <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %tmp3 = fcmp false <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
 define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
 ; CHECK-LABEL: fcmoeqz2xfloat:
 ; CHECK:       // %bb.0:

llvmbot · 2024-03-28T16:25:45Z

@llvm/pr-subscribers-llvm-globalisel

Author: Marc Auberer (marcauberer)

Changes

Fixes #86917

FCMP_TRUE and FCMP_FALSE were previously not considered and we ended up in an llvm_unreachable assertion.

Full diff: https://github.com/llvm/llvm-project/pull/86972.diff

3 Files Affected:

(modified) llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp (+6)
(modified) llvm/test/CodeGen/AArch64/GlobalISel/select.mir (+20)
(modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+101)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
index 92db89cc0915b8..80fe4bcb8b58f7 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp
@@ -147,6 +147,12 @@ void AArch64GISelUtils::changeFCMPPredToAArch64CC(
   case CmpInst::FCMP_UNE:
     CondCode = AArch64CC::NE;
     break;
+  case CmpInst::FCMP_TRUE:
+    CondCode = AArch64CC::AL;
+    break;
+  case CmpInst::FCMP_FALSE:
+    CondCode = AArch64CC::NV;
+    break;
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
index bcdd77ac57570d..e207a31063bacf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select.mir
@@ -183,6 +183,14 @@ registers:
   - { id: 5, class: gpr }
   - { id: 6, class: gpr }
   - { id: 7, class: gpr }
+  - { id: 8, class: fpr }
+  - { id: 9, class: gpr }
+  - { id: 10, class: fpr }
+  - { id: 11, class: gpr }
+  - { id: 12, class: gpr }
+  - { id: 13, class: gpr }
+  - { id: 14, class: gpr }
+  - { id: 15, class: gpr }
 
 # CHECK:  body:
 # CHECK:    nofpexcept FCMPSrr %0, %0, implicit-def $nzcv
@@ -209,6 +217,18 @@ body:             |
     %7(s32) = G_ANYEXT %5
     $w0 = COPY %7(s32)
 
+    %8(s32) = COPY $s0
+    %9(s32) = G_FCMP floatpred(true), %8, %8
+    %12(s8) = G_TRUNC %9(s32)
+    %14(s32) = G_ANYEXT %12
+    $w0 = COPY %14(s32)
+
+    %10(s64) = COPY $d0
+    %11(s32) = G_FCMP floatpred(false), %10, %10
+    %13(s8) = G_TRUNC %11(s32)
+    %15(s32) = G_ANYEXT %13
+    $w0 = COPY %15(s32)
+
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
index 632b6b32625022..dbb5dfebd44abc 100644
--- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll
@@ -2870,6 +2870,107 @@ define <2 x i64> @fcmune2xdouble(<2 x double> %A, <2 x double> %B) {
   ret <2 x i64> %tmp4
 }
 
+define <2 x i32> @fcmal2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-SD-LABEL: fcmal2xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal2xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    movi v0.2s, #1
+; CHECK-GI-NEXT:    shl v0.2s, v0.2s, #31
+; CHECK-GI-NEXT:    sshr v0.2s, v0.2s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmal4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmal4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #1 // =0x1
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmal2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-SD-LABEL: fcmal2xdouble:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0xffffffffffffffff
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmal2xdouble:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    adrp x8, .LCPI221_0
+; CHECK-GI-NEXT:    ldr q0, [x8, :lo12:.LCPI221_0]
+; CHECK-GI-NEXT:    shl v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    sshr v0.2d, v0.2d, #63
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp true <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i32> @fcmnv2xfloat(<2 x float> %A, <2 x float> %B) {
+; CHECK-LABEL: fcmnv2xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %tmp3 = fcmp false <2 x float> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+  ret <2 x i32> %tmp4
+}
+
+define <4 x i32> @fcmnv4xfloat(<4 x float> %A, <4 x float> %B) {
+; CHECK-SD-LABEL: fcmnv4xfloat:
+; CHECK-SD:       // %bb.0:
+; CHECK-SD-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT:    ret
+;
+; CHECK-GI-LABEL: fcmnv4xfloat:
+; CHECK-GI:       // %bb.0:
+; CHECK-GI-NEXT:    mov w8, #0 // =0x0
+; CHECK-GI-NEXT:    fmov s0, w8
+; CHECK-GI-NEXT:    mov v1.16b, v0.16b
+; CHECK-GI-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-GI-NEXT:    mov v0.h[1], v0.h[0]
+; CHECK-GI-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT:    mov v1.d[1], v0.d[0]
+; CHECK-GI-NEXT:    shl v0.4s, v1.4s, #31
+; CHECK-GI-NEXT:    sshr v0.4s, v0.4s, #31
+; CHECK-GI-NEXT:    ret
+  %tmp3 = fcmp false <4 x float> %A, %B
+  %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+  ret <4 x i32> %tmp4
+}
+define <2 x i64> @fcmnv2xdouble(<2 x double> %A, <2 x double> %B) {
+; CHECK-LABEL: fcmnv2xdouble:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
+  %tmp3 = fcmp false <2 x double> %A, %B
+  %tmp4 = sext <2 x i1> %tmp3 to <2 x i64>
+  ret <2 x i64> %tmp4
+}
+
 define <2 x i32> @fcmoeqz2xfloat(<2 x float> %A) {
 ; CHECK-LABEL: fcmoeqz2xfloat:
 ; CHECK:       // %bb.0:

llvm/test/CodeGen/AArch64/GlobalISel/select.mir

marcauberer · 2024-03-28T23:35:47Z

@arsenm How is the procedure for such crash fixes. I am unsure, because this is my first one. Do we need to downport this to release/18.x?

marcauberer · 2024-05-05T14:43:07Z

/cherry-pick c482fad

llvmbot · 2024-05-05T15:05:23Z

/cherry-pick c482fad

Error: Command failed due to missing milestone.

marcauberer · 2024-05-05T15:07:35Z

/cherry-pick c482fad

…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion. (cherry picked from commit c482fad)

llvmbot · 2024-05-05T15:12:45Z

/pull-request #91126

…tion (llvm#86972) Fixes llvm#86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion.

…tion (#86972) (#91580) Fixes #86917 `FCMP_TRUE` and `FCMP_FALSE` were previously not considered and we ended up in an llvm_unreachable assertion.

[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selec…

263d3f3

…tion

llvmbot added backend:AArch64 llvm:globalisel labels Mar 28, 2024

marcauberer changed the title ~~[GISEL] Consider fcmp true and fcmp false in cond code selection~~ [AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection Mar 28, 2024

marcauberer requested review from TNorthover and ornata March 28, 2024 16:29

arsenm approved these changes Mar 28, 2024

View reviewed changes

llvm/test/CodeGen/AArch64/GlobalISel/select.mir Show resolved Hide resolved

marcauberer merged commit c482fad into llvm:main Mar 28, 2024
7 checks passed

marcauberer deleted the globalisel/fcmp-true-false-support branch March 28, 2024 22:10

marcauberer added this to the LLVM 18.X Release milestone May 5, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972

[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972

marcauberer commented Mar 28, 2024

llvmbot commented Mar 28, 2024

llvmbot commented Mar 28, 2024

marcauberer commented Mar 28, 2024

marcauberer commented May 5, 2024 •

edited

llvmbot commented May 5, 2024

marcauberer commented May 5, 2024

llvmbot commented May 5, 2024

[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972

[AArch64][GISEL] Consider fcmp true and fcmp false in cond code selection #86972

Conversation

marcauberer commented Mar 28, 2024

llvmbot commented Mar 28, 2024

llvmbot commented Mar 28, 2024

marcauberer commented Mar 28, 2024

marcauberer commented May 5, 2024 • edited

llvmbot commented May 5, 2024

marcauberer commented May 5, 2024

llvmbot commented May 5, 2024

marcauberer commented May 5, 2024 •

edited