[DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` #67161

goldsteinn · 2023-09-22T16:15:45Z

Previously, we where taking CurVT before finalizing ToCast which
meant potentially returning an SDValue with an illegal ValueType
for the operation.

Fix is to just take CurVT after we have finalized ToCast with
PeekThroughCastsAndTrunc.

llvmbot · 2023-09-22T16:16:50Z

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-llvm-selectiondag

Changes

Previously, we where taking CurVT before finalizing ToCast which
meant potentially returning an SDValue with an illegal ValueType
for the operation.

Fix is to just take CurVT after we have finalized ToCast with
PeekThroughCastsAndTrunc.

Full diff: https://github.com/llvm/llvm-project/pull/67161.diff

2 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+1-1)
(added) llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll (+444)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 693523e737acf66..7d5dc96bd0e2c94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27323,8 +27323,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
     return SDValue();
 
   auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
-    EVT CurVT = ToCast.getValueType();
     ToCast = PeekThroughCastsAndTrunc(ToCast);
+    EVT CurVT = ToCast.getValueType();
     if (NewVT == CurVT)
       return ToCast;
 
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
new file mode 100644
index 000000000000000..b433e61cd18732e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
+
+declare i16 @llvm.umax.i16(i16, i16)
+declare i64 @llvm.umin.i64(i64, i64)
+
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v1.4s, #1
+; CHECK-NEXT:    ushl v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    fmov v1.4s, #9.00000000
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+  %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+  ret <4 x float> %r
+}
+
+define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w0, v0.s[1]
+; CHECK-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    fmov w0, s1
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    mov v0.s[1], v1.s[0]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, v0.s[2]
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    mov v1.s[2], v0.s[0]
+; CHECK-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    mov w0, v0.s[3]
+; CHECK-NEXT:    fmov s0, #9.00000000
+; CHECK-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    bl ldexpf
+; CHECK-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    mov v1.s[3], v0.s[0]
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+  %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
+  ret <4 x float> %r
+}
+
+define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fdiv_pow2_4xfloat:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v1.4s, #9.00000000
+; CHECK-NEXT:    shl v0.4s, v0.4s, #23
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+  %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+  %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+  ret <4 x float> %r
+}
+
+define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    fmov d1, #9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 1, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.000000e+00, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    fmov d1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 2, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double -9.000000e+00, %conv
+  ret double %mul
+}
+
+define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
+; CHECK-LABEL: fmul_pow_select:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    tst w1, #0x1
+; CHECK-NEXT:    fmov s1, #9.00000000
+; CHECK-NEXT:    cinc w8, w8, eq
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    fmul s0, s0, s1
+; CHECK-NEXT:    ret
+  %shl2 = shl nuw i32 2, %cnt
+  %shl1 = shl nuw i32 1, %cnt
+  %shl = select i1 %c, i32 %shl1, i32 %shl2
+  %conv = uitofp i32 %shl to float
+  %mul = fmul float 9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    mov w9, #8192 // =0x2000
+; CHECK-NEXT:    fmov s1, #9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT:    csel x8, x8, x9, lo
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    fmul s0, s0, s1
+; CHECK-NEXT:    ret
+  %shl8 = shl nuw i64 8, %cnt
+  %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
+  %conv = uitofp i64 %shl to float
+  %mul = fmul float 9.000000e+00, %conv
+  ret float %mul
+}
+
+define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_mul_max_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    fmov d1, #3.00000000
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    lsl w9, w9, w0
+; CHECK-NEXT:    and w8, w8, #0xfffe
+; CHECK-NEXT:    and w9, w9, #0xffff
+; CHECK-NEXT:    cmp w9, w8
+; CHECK-NEXT:    csel w8, w9, w8, hi
+; CHECK-NEXT:    ucvtf d0, w8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl2 = shl nuw i16 2, %cnt
+  %shl1 = shl nuw i16 1, %cnt
+  %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
+  %conv = uitofp i16 %shl to double
+  %mul = fmul double 3.000000e+00, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    lsl x8, x0, x1
+; CHECK-NEXT:    fmov d1, #9.00000000
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 %v, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.000000e+00, %conv
+  ret double %mul
+}
+
+define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2s, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fcvtn v0.2s, v0.2d
+; CHECK-NEXT:    fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x float>
+  %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
+  ret <2 x float> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2d, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi v2.4s, #2
+; CHECK-NEXT:    ushl v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    fmov v2.4s, #5.00000000
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    fmul v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
+  %conv = uitofp <4 x i32> %shl to <4 x float>
+  %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
+  %res = fadd <4 x float> %mul, %add
+  ret <4 x float> %res
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #2 // =0x2
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    adrp x8, .LCPI12_0
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI13_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT:    ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    fmov v1.2d, #15.00000000
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+
+define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf d0, x8
+; CHECK-NEXT:    adrp x8, .LCPI14_0
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i64 1, %cnt
+  %conv = uitofp i64 %shl to double
+  %mul = fmul double 9.745314e+288, %conv
+  ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_safe:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    and w8, w8, #0xffff
+; CHECK-NEXT:    ucvtf d0, w8
+; CHECK-NEXT:    adrp x8, .LCPI15_0
+; CHECK-NEXT:    ldr d1, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT:    fmul d0, d0, d1
+; CHECK-NEXT:    ret
+  %shl = shl nuw i16 1, %cnt
+  %conv = uitofp i16 %shl to double
+  %mul = fmul double 9.745314e+288, %conv
+  ret double %mul
+}
+
+define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fmov v1.2d, #1.00000000
+; CHECK-NEXT:    shl v0.2d, v0.2d, #52
+; CHECK-NEXT:    sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT:    ret
+  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x double>
+  %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
+  ret <2 x double> %mul
+}
+
+define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    fmov v1.2s, #1.00000000
+; CHECK-NEXT:    shl v0.2s, v0.2s, #23
+; CHECK-NEXT:    sub v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+  %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+  %conv = uitofp <2 x i64> %shl to <2 x float>
+  %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
+  ret <2 x float> %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    fmov s1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    ucvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl i64 8, %cnt
+  %conv = uitofp i64 %shl to float
+  %mul = fdiv float -9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    fmov s1, #-9.00000000
+; CHECK-NEXT:    lsl x8, x8, x0
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl i64 8, %cnt
+  %conv = sitofp i64 %shl to float
+  %mul = fdiv float -9.000000e+00, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #8 // =0x8
+; CHECK-NEXT:    and x9, x0, #0x1f
+; CHECK-NEXT:    fmov s1, #-0.50000000
+; CHECK-NEXT:    lsl x8, x8, x9
+; CHECK-NEXT:    scvtf s0, x8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %cnt = and i64 %cnt_in, 31
+  %shl = shl i64 8, %cnt
+  %conv = sitofp i64 %shl to float
+  %mul = fdiv float -0.500000e+00, %conv
+  ret float %mul
+}
+
+define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #3936146074321813504 // =0x36a0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sub x8, x8, x0, lsl #52
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to double
+  %mul = fdiv double 0x36A0000000000000, %conv
+  ret double %mul
+}
+
+define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1 // =0x1
+; CHECK-NEXT:    lsl w8, w8, w0
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    mov w8, #65528 // =0xfff8
+; CHECK-NEXT:    movk w8, #4351, lsl #16
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    fdiv s0, s1, s0
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to float
+  %mul = fdiv float 0x3a1fffff00000000, %conv
+  ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #285212672 // =0x11000000
+; CHECK-NEXT:    sub w8, w8, w0, lsl #23
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    ret
+  %shl = shl nuw i32 1, %cnt
+  %conv = uitofp i32 %shl to float
+  %mul = fdiv float 0x3a20000000000000, %conv
+  ret float %mul
+}
+
+define fastcc i1 @quantum_hadamard(i32 %0) {
+; CHECK-LABEL: quantum_hadamard:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT:    sub x8, x8, x0, lsl #52
+; CHECK-NEXT:    fmov d0, x8
+; CHECK-NEXT:    fcvt s0, d0
+; CHECK-NEXT:    fcmp s0, #0.0
+; CHECK-NEXT:    cset w0, gt
+; CHECK-NEXT:    ret
+  %2 = zext i32 %0 to i64
+  %3 = shl i64 1, %2
+  %4 = uitofp i64 %3 to double
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fptrunc double %5 to float
+  %7 = fcmp olt float 0.000000e+00, %6
+  ret i1 %7
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-NEON: {{.*}}
+; CHECK-NO-NEON: {{.*}}

davemgreen

Thanks for the fix.

davemgreen · 2023-09-23T06:33:15Z

llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll

@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON


+neon should be the default, so for no-neon it likely need -mattr=-neon.
That is if you feel you need to test without neon, it might not be necessary?

Previously, we where taking `CurVT` before finalizing `ToCast` which meant potentially returning an `SDValue` with an illegal `ValueType` for the operation. Fix is to just take `CurVT` after we have finalized `ToCast` with `PeekThroughCastsAndTrunc`.

goldsteinn · 2023-09-23T14:53:17Z

Pushed this with: bc38c42 guess messed up linking the PR <-> commit. But closing.

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Sep 22, 2023

goldsteinn requested review from RKSimon and davemgreen September 22, 2023 16:17

davemgreen approved these changes Sep 23, 2023

View reviewed changes

goldsteinn force-pushed the goldsteinn/fix-take-inexepensive-log2 branch from a1c8ace to 7a776a0 Compare September 23, 2023 14:03

goldsteinn closed this Sep 23, 2023

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` #67161

[DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` #67161

goldsteinn commented Sep 22, 2023

llvmbot commented Sep 22, 2023 •

edited

davemgreen left a comment

davemgreen Sep 23, 2023

goldsteinn commented Sep 23, 2023

		@@ -0,0 +1,444 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
		; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon \| FileCheck %s --check-prefixes=CHECK,CHECK-NEON

[DAGCombiner][AArch64] Fix incorrect cast VT in takeInexpensiveLog2 #67161

[DAGCombiner][AArch64] Fix incorrect cast VT in takeInexpensiveLog2 #67161

Conversation

goldsteinn commented Sep 22, 2023

llvmbot commented Sep 22, 2023 • edited

davemgreen left a comment

Choose a reason for hiding this comment

davemgreen Sep 23, 2023

Choose a reason for hiding this comment

goldsteinn commented Sep 23, 2023

[DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` #67161

[DAGCombiner][AArch64] Fix incorrect cast VT in `takeInexpensiveLog2` #67161

llvmbot commented Sep 22, 2023 •

edited