-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner][AArch64] Fix incorrect cast VT in takeInexpensiveLog2
#67161
[DAGCombiner][AArch64] Fix incorrect cast VT in takeInexpensiveLog2
#67161
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-selectiondag ChangesPreviously, we where taking Fix is to just take Full diff: https://github.com/llvm/llvm-project/pull/67161.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 693523e737acf66..7d5dc96bd0e2c94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -27323,8 +27323,8 @@ static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
return SDValue();
auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
- EVT CurVT = ToCast.getValueType();
ToCast = PeekThroughCastsAndTrunc(ToCast);
+ EVT CurVT = ToCast.getValueType();
if (NewVT == CurVT)
return ToCast;
diff --git a/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
new file mode 100644
index 000000000000000..b433e61cd18732e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON
+; RUN: llc < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO-NEON
+
+declare i16 @llvm.umax.i16(i16, i16)
+declare i64 @llvm.umin.i64(i64, i64)
+
+declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
+
+define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: ushl v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: fmov v1.4s, #9.00000000
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+ %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+ %r = fmul <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+ ret <4 x float> %r
+}
+
+define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fmul_pow2_ldexp_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: mov w0, v0.s[1]
+; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: fmov w0, s1
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov w0, v0.s[2]
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: mov v1.s[2], v0.s[0]
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov w0, v0.s[3]
+; CHECK-NEXT: fmov s0, #9.00000000
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl ldexpf
+; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: mov v1.s[3], v0.s[0]
+; CHECK-NEXT: mov v0.16b, v1.16b
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
+ ret <4 x float> %r
+}
+
+define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
+; CHECK-LABEL: fdiv_pow2_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v1.4s, #9.00000000
+; CHECK-NEXT: shl v0.4s, v0.4s, #23
+; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %p2 = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %i
+ %p2_f = uitofp <4 x i32> %p2 to <4 x float>
+ %r = fdiv <4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, %p2_f
+ ret <4 x float> %r
+}
+
+define double @fmul_pow_shl_cnt(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: fmov d1, #9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 1, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.000000e+00, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: fmov d1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 2, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double -9.000000e+00, %conv
+ ret double %mul
+}
+
+define float @fmul_pow_select(i32 %cnt, i1 %c) nounwind {
+; CHECK-LABEL: fmul_pow_select:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: tst w1, #0x1
+; CHECK-NEXT: fmov s1, #9.00000000
+; CHECK-NEXT: cinc w8, w8, eq
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+ %shl2 = shl nuw i32 2, %cnt
+ %shl1 = shl nuw i32 1, %cnt
+ %shl = select i1 %c, i32 %shl1, i32 %shl2
+ %conv = uitofp i32 %shl to float
+ %mul = fmul float 9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fmul_fly_pow_mul_min_pow2(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_fly_pow_mul_min_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: mov w9, #8192 // =0x2000
+; CHECK-NEXT: fmov s1, #9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192
+; CHECK-NEXT: csel x8, x8, x9, lo
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+ %shl8 = shl nuw i64 8, %cnt
+ %shl = call i64 @llvm.umin.i64(i64 %shl8, i64 8192)
+ %conv = uitofp i64 %shl to float
+ %mul = fmul float 9.000000e+00, %conv
+ ret float %mul
+}
+
+define double @fmul_pow_mul_max_pow2(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_mul_max_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: fmov d1, #3.00000000
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: lsl w9, w9, w0
+; CHECK-NEXT: and w8, w8, #0xfffe
+; CHECK-NEXT: and w9, w9, #0xffff
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: csel w8, w9, w8, hi
+; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl2 = shl nuw i16 2, %cnt
+ %shl1 = shl nuw i16 1, %cnt
+ %shl = call i16 @llvm.umax.i16(i16 %shl1, i16 %shl2)
+ %conv = uitofp i16 %shl to double
+ %mul = fmul double 3.000000e+00, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: lsl x8, x0, x1
+; CHECK-NEXT: fmov d1, #9.00000000
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 %v, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.000000e+00, %conv
+ ret double %mul
+}
+
+define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_fail_expensive_cast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2s, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fcvtn v0.2s, v0.2d
+; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x float>
+ %mul = fmul <2 x float> <float 15.000000e+00, float 15.000000e+00>, %conv
+ ret <2 x float> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2d, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <4 x float> @fmul_pow_shl_cnt_vec_preserve_fma(<4 x i32> %cnt, <4 x float> %add) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_preserve_fma:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v2.4s, #2
+; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmov v2.4s, #5.00000000
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <4 x i32> <i32 2, i32 2, i32 2, i32 2>, %cnt
+ %conv = uitofp <4 x i32> %shl to <4 x float>
+ %mul = fmul <4 x float> <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00>, %conv
+ %res = fadd <4 x float> %mul, %add
+ ret <4 x float> %res
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat_todo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2 // =0x2
+; CHECK-NEXT: dup v1.2d, x8
+; CHECK-NEXT: adrp x8, .LCPI12_0
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0]
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 2>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 14.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <2 x double> @fmul_pow_shl_cnt_vec_non_splat2_todo(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_vec_non_splat2_todo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI13_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
+; CHECK-NEXT: ushl v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: fmov v1.2d, #15.00000000
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %shl = shl nsw nuw <2 x i64> <i64 2, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fmul <2 x double> <double 15.000000e+00, double 15.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+
+define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_fail_maybe_bad_exp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf d0, x8
+; CHECK-NEXT: adrp x8, .LCPI14_0
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i64 1, %cnt
+ %conv = uitofp i64 %shl to double
+ %mul = fmul double 9.745314e+288, %conv
+ ret double %mul
+}
+
+define double @fmul_pow_shl_cnt_safe(i16 %cnt) nounwind {
+; CHECK-LABEL: fmul_pow_shl_cnt_safe:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: and w8, w8, #0xffff
+; CHECK-NEXT: ucvtf d0, w8
+; CHECK-NEXT: adrp x8, .LCPI15_0
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI15_0]
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %shl = shl nuw i16 1, %cnt
+ %conv = uitofp i16 %shl to double
+ %mul = fmul double 9.745314e+288, %conv
+ ret double %mul
+}
+
+define <2 x double> @fdiv_pow_shl_cnt_vec(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v1.2d, #1.00000000
+; CHECK-NEXT: shl v0.2d, v0.2d, #52
+; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x double>
+ %mul = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %conv
+ ret <2 x double> %mul
+}
+
+define <2 x float> @fdiv_pow_shl_cnt_vec_with_expensive_cast(<2 x i64> %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_vec_with_expensive_cast:
+; CHECK: // %bb.0:
+; CHECK-NEXT: xtn v0.2s, v0.2d
+; CHECK-NEXT: fmov v1.2s, #1.00000000
+; CHECK-NEXT: shl v0.2s, v0.2s, #23
+; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s
+; CHECK-NEXT: ret
+ %shl = shl nuw <2 x i64> <i64 1, i64 1>, %cnt
+ %conv = uitofp <2 x i64> %shl to <2 x float>
+ %mul = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %conv
+ ret <2 x float> %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_maybe_z:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: fmov s1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: ucvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl i64 8, %cnt
+ %conv = uitofp i64 %shl to float
+ %mul = fdiv float -9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt_fail_neg_int:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: fmov s1, #-9.00000000
+; CHECK-NEXT: lsl x8, x8, x0
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl i64 8, %cnt
+ %conv = sitofp i64 %shl to float
+ %mul = fdiv float -9.000000e+00, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt(i64 %cnt_in) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #8 // =0x8
+; CHECK-NEXT: and x9, x0, #0x1f
+; CHECK-NEXT: fmov s1, #-0.50000000
+; CHECK-NEXT: lsl x8, x8, x9
+; CHECK-NEXT: scvtf s0, x8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %cnt = and i64 %cnt_in, 31
+ %shl = shl i64 8, %cnt
+ %conv = sitofp i64 %shl to float
+ %mul = fdiv float -0.500000e+00, %conv
+ ret float %mul
+}
+
+define double @fdiv_pow_shl_cnt32_to_dbl_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_to_dbl_okay:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #3936146074321813504 // =0x36a0000000000000
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sub x8, x8, x0, lsl #52
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to double
+ %mul = fdiv double 0x36A0000000000000, %conv
+ ret double %mul
+}
+
+define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_out_of_bounds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #1 // =0x1
+; CHECK-NEXT: lsl w8, w8, w0
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: mov w8, #65528 // =0xfff8
+; CHECK-NEXT: movk w8, #4351, lsl #16
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fdiv s0, s1, s0
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to float
+ %mul = fdiv float 0x3a1fffff00000000, %conv
+ ret float %mul
+}
+
+define float @fdiv_pow_shl_cnt32_okay(i32 %cnt) nounwind {
+; CHECK-LABEL: fdiv_pow_shl_cnt32_okay:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #285212672 // =0x11000000
+; CHECK-NEXT: sub w8, w8, w0, lsl #23
+; CHECK-NEXT: fmov s0, w8
+; CHECK-NEXT: ret
+ %shl = shl nuw i32 1, %cnt
+ %conv = uitofp i32 %shl to float
+ %mul = fdiv float 0x3a20000000000000, %conv
+ ret float %mul
+}
+
+define fastcc i1 @quantum_hadamard(i32 %0) {
+; CHECK-LABEL: quantum_hadamard:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: sub x8, x8, x0, lsl #52
+; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcvt s0, d0
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: cset w0, gt
+; CHECK-NEXT: ret
+ %2 = zext i32 %0 to i64
+ %3 = shl i64 1, %2
+ %4 = uitofp i64 %3 to double
+ %5 = fdiv double 1.000000e+00, %4
+ %6 = fptrunc double %5 to float
+ %7 = fcmp olt float 0.000000e+00, %6
+ ret i1 %7
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-NEON: {{.*}}
+; CHECK-NO-NEON: {{.*}}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the fix.
@@ -0,0 +1,444 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+neon should be the default, so for no-neon it likely need -mattr=-neon.
That is if you feel you need to test without neon, it might not be necessary?
Previously, we where taking `CurVT` before finalizing `ToCast` which meant potentially returning an `SDValue` with an illegal `ValueType` for the operation. Fix is to just take `CurVT` after we have finalized `ToCast` with `PeekThroughCastsAndTrunc`.
a1c8ace
to
7a776a0
Compare
Pushed this with: bc38c42 guess messed up linking the PR <-> commit. But closing. |
Previously, we where taking
CurVT
before finalizingToCast
whichmeant potentially returning an
SDValue
with an illegalValueType
for the operation.
Fix is to just take
CurVT
after we have finalizedToCast
withPeekThroughCastsAndTrunc
.