Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAGCombiner][AArch64] Fix incorrect cast VT in takeInexpensiveLog2 #67161

Closed

Conversation

goldsteinn
Copy link
Contributor

Previously, we where taking CurVT before finalizing ToCast which
meant potentially returning an SDValue with an illegal ValueType
for the operation.

Fix is to just take CurVT after we have finalized ToCast with
PeekThroughCastsAndTrunc.

@llvmbot llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Sep 22, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Sep 22, 2023

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Changes

Previously, we where taking CurVT before finalizing ToCast which
meant potentially returning an SDValue with an illegal ValueType
for the operation.

Fix is to just take CurVT after we have finalized ToCast with
PeekThroughCastsAndTrunc.


Full diff: https://github.com/llvm/llvm-project/pull/67161.diff

2 Files Affected:

  • (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+1-1)
  • (added) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+444)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 693523e737acf66..7d5dc96bd0e2c94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27323,8 +27323,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
     return SDValue();
 
   auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
-    EVT CurVT = ToCast.getValueType();
     ToCast = PeekThroughCastsAndTrunc(ToCast);
+    EVT CurVT = ToCast.getValueType();
     if (NewVT == CurVT)
       return ToCast;
 
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
new file mode 100644
index 000000000000000..b433e61cd18732e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
+
+declare i16 @llvm.umax.i16(i16, i16)
+declare i64 @llvm.umin.i64(i64, i64)
+
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    ushl v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    fmov v1.4s, #9.00000000
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+  %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+  ret <4 x float> %r
+}
+
+define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, v0.s[1]
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    fmov w0, s1
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, v0.s[2]
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, v0.s[3]
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+  %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
+  ret <4 x float> %r
+}
+
+define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fdiv_pow2_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v1.4s, #9.00000000
+; CHECK-NEXT:    shl v0.4s, v0.4s, #23
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+  %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+  ret <4 x float> %r
+}
+
+define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    fmov d1, #9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 1, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.000000e+00, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    fmov d1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 2, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double -9.000000e+00, %conv
+  ret double %mul
+}
+
+define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
+; CHECK-LABEL: fmul_pow_select:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    tst w1, #0x1
+; CHECK-NEXT:    fmov s1, #9.00000000
+; CHECK-NEXT:    cinc w8, w8, eq
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    fmul s0, s0, s1
+; CHECK-NEXT:    ret
+  %shl2 = shl nuw i32 2, %cnt
+  %shl1 = shl nuw i32 1, %cnt
+  %shl = select i1 %c, i32 %shl1, i32 %shl2
+  %conv = uitofp i32 %shl to float
+  %mul = fmul float 9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    mov w9, #8192 // =0x2000
+; CHECK-NEXT:    fmov s1, #9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT:    csel x8, x8, x9, lo
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    fmul s0, s0, s1
+; CHECK-NEXT:    ret
+  %shl8 = shl nuw i64 8, %cnt
+  %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
+  %conv = uitofp i64 %shl to float
+  %mul = fmul float 9.000000e+00, %conv
+  ret float %mul
+}
+
+define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_mul_max_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    fmov d1, #3.00000000
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    lsl w9, w9, w0
+; CHECK-NEXT:    and w8, w8, #0xfffe
+; CHECK-NEXT:    and w9, w9, #0xffff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csel w8, w9, w8, hi
+; CHECK-NEXT:    ucvtf d0, w8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl2 = shl nuw i16 2, %cnt
+  %shl1 = shl nuw i16 1, %cnt
+  %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
+  %conv = uitofp i16 %shl to double
+  %mul = fmul double 3.000000e+00, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl x8, x0, x1
+; CHECK-NEXT:    fmov d1, #9.00000000
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 %v, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.000000e+00, %conv
+  ret double %mul
+}
+
+define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2s, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x float>
+  %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
+  ret <2 x float> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2d, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #2
+; CHECK-NEXT:    ushl v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    fmov v2.4s, #5.00000000
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
+  %conv = uitofp <4 x i32> %shl to <4 x float>
+  %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
+  %res = fadd <4 x float> %mul, %add
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    adrp x8, .LCPI12_0
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI13_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2d, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+
+define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 1, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.745314e+288, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_safe:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    and w8, w8, #0xffff
+; CHECK-NEXT:    ucvtf d0, w8
+; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i16 1, %cnt
+  %conv = uitofp i16 %shl to double
+  %mul = fmul double 9.745314e+288, %conv
+  ret double %mul
+}
+
+define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v1.2d, #1.00000000
+; CHECK-NEXT:    shl v0.2d, v0.2d, #52
+; CHECK-NEXT:    sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    fmov v1.2s, #1.00000000
+; CHECK-NEXT:    shl v0.2s, v0.2s, #23
+; CHECK-NEXT:    sub v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x float>
+  %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
+  ret <2 x float> %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    fmov s1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl i64 8, %cnt
+  %conv = uitofp i64 %shl to float
+  %mul = fdiv float -9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    fmov s1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl i64 8, %cnt
+  %conv = sitofp i64 %shl to float
+  %mul = fdiv float -9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    and x9, x0, #0x1f
+; CHECK-NEXT:    fmov s1, #-0.50000000
+; CHECK-NEXT:    lsl x8, x8, x9
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %cnt = and i64 %cnt_in, 31
+  %shl = shl i64 8, %cnt
+  %conv = sitofp i64 %shl to float
+  %mul = fdiv float -0.500000e+00, %conv
+  ret float %mul
+}
+
+define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #3936146074321813504 // =0x36a0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sub x8, x8, x0, lsl #52
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to double
+  %mul = fdiv double 0x36A0000000000000, %conv
+  ret double %mul
+}
+
+define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    mov w8, #65528 // =0xfff8
+; CHECK-NEXT:    movk w8, #4351, lsl #16
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to float
+  %mul = fdiv float 0x3a1fffff00000000, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #285212672 // =0x11000000
+; CHECK-NEXT:    sub w8, w8, w0, lsl #23
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to float
+  %mul = fdiv float 0x3a20000000000000, %conv
+  ret float %mul
+}
+
+define fastcc i1 @quantum_hadamard(i32 %0) {
+; CHECK-LABEL: quantum_hadamard:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sub x8, x8, x0, lsl #52
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvt s0, d0
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+  %2 = zext i32 %0 to i64
+  %3 = shl i64 1, %2
+  %4 = uitofp i64 %3 to double
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fptrunc double %5 to float
+  %7 = fcmp olt float 0.000000e+00, %6
+  ret i1 %7
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-NEON: {{.*}}
+; CHECK-NO-NEON: {{.*}}

Copy link
Collaborator

@davemgreen davemgreen left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the fix.

@@ -0,0 +1,444 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+neon should be the default, so for no-neon it likely need -mattr=-neon.
That is if you feel you need to test without neon, it might not be necessary?

Previously, we where taking `CurVT` before finalizing `ToCast` which
meant potentially returning an `SDValue` with an illegal `ValueType`
for the operation.

Fix is to just take `CurVT` after we have finalized `ToCast` with
`PeekThroughCastsAndTrunc`.
@goldsteinn goldsteinn force-pushed the goldsteinn/fix-take-inexepensive-log2 branch from a1c8ace to 7a776a0 Compare September 23, 2023 14:03
@goldsteinn
Copy link
Contributor Author

Pushed this with: bc38c42 guess messed up linking the PR <-> commit. But closing.

@goldsteinn goldsteinn closed this Sep 23, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

3 participants