Skip to content

Commit

Permalink
[CodeGen] Fix neutral value of vecreduce fadd in tests (NFC)
Browse files Browse the repository at this point in the history
The neutral value is -0.0, not 0.0. This doesn't matter for "fast"
reductions due to nsz, but does matter for reassoc-only and seq
reductions.

Change tests to mostly use -0.0 where the neutral value was intended,
and add some additional test coverage in some places. Also update
LangRef to use the right value.
  • Loading branch information
nikic committed Oct 29, 2020
1 parent b22f111 commit fa48ff3
Show file tree
Hide file tree
Showing 7 changed files with 943 additions and 351 deletions.
8 changes: 7 additions & 1 deletion llvm/docs/LangRef.rst
Expand Up @@ -15680,12 +15680,15 @@ The first argument to this intrinsic is a scalar start value for the reduction.
The type of the start value matches the element-type of the vector input.
The second argument must be a vector of floating-point values.

To ignore the start value, negative zero (``-0.0``) can be used, as it is
the neutral value of floating point addition.

Examples:
"""""""""

::

%unord = call reassoc float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> %input) ; relaxed reduction
%unord = call reassoc float @llvm.vector.reduce.fadd.v4f32(float -0.0, <4 x float> %input) ; relaxed reduction
%ord = call float @llvm.vector.reduce.fadd.v4f32(float %start_value, <4 x float> %input) ; sequential reduction


Expand Down Expand Up @@ -15751,6 +15754,9 @@ The first argument to this intrinsic is a scalar start value for the reduction.
The type of the start value matches the element-type of the vector input.
The second argument must be a vector of floating-point values.

To ignore the start value, one (``1.0``) can be used, as it is the neutral
value of floating point multiplication.

Examples:
"""""""""

Expand Down
169 changes: 141 additions & 28 deletions llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll
Expand Up @@ -9,93 +9,172 @@ declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
define half @test_v1f16(<1 x half> %a, half %s) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: fcvt s0, h0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fcvt s1, h1
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: fcvt h0, s0
; CHECK-NEXT: ret
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
ret half %b
}

define float @test_v1f32(<1 x float> %a) nounwind {
define half @test_v1f16_neutral(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
ret half %b
}

define float @test_v1f32(<1 x float> %a, float %s) nounwind {
; CHECK-LABEL: test_v1f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
ret float %b
}

define float @test_v1f32_neutral(<1 x float> %a) nounwind {
; CHECK-LABEL: test_v1f32_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
define double @test_v1f64(<1 x double> %a, double %s) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov d1, xzr
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: fadd d0, d1, d0
; CHECK-NEXT: ret
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
define double @test_v1f64_neutral(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: mov v2.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: mov v1.16b, v2.16b
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
ret fp128 %b
}

define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
ret fp128 %b
}

define float @test_v3f32(<3 x float> %a) nounwind {
define float @test_v3f32(<3 x float> %a, float %s) nounwind {
; CHECK-LABEL: test_v3f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, wzr
; CHECK-NEXT: fadd s1, s1, s0
; CHECK-NEXT: mov s2, v0.s[1]
; CHECK-NEXT: fadd s1, s0, s1
; CHECK-NEXT: fadd s1, s1, s2
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
ret float %b
}

define float @test_v3f32_neutral(<3 x float> %a) nounwind {
; CHECK-LABEL: test_v3f32_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: faddp s1, v0.2s
; CHECK-NEXT: mov s0, v0.s[2]
; CHECK-NEXT: fadd s0, s1, s0
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
ret float %b
}

define float @test_v5f32(<5 x float> %a, float %s) nounwind {
; CHECK-LABEL: test_v5f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd s0, s5, s0
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s3
; CHECK-NEXT: fadd s0, s0, s4
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
ret float %b
}

define float @test_v5f32_neutral(<5 x float> %a) nounwind {
; CHECK-LABEL: test_v5f32_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s3
; CHECK-NEXT: fadd s0, s0, s4
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
ret float %b
}

define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
; CHECK-LABEL: test_v2f128:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #32 // =32
; CHECK-NEXT: adrp x8, .LCPI5_0
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
; CHECK-NEXT: mov v1.16b, v0.16b
; CHECK-NEXT: mov v0.16b, v2.16b
; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-NEXT: add sp, sp, #32 // =32
; CHECK-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
ret fp128 %b
}

define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
; CHECK-LABEL: test_v2f128_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
ret fp128 %b
}

define float @test_v16f32(<16 x float> %a) nounwind {
define float @test_v16f32(<16 x float> %a, float %s) nounwind {
; CHECK-LABEL: test_v16f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s4, wzr
; CHECK-NEXT: fadd s4, s4, s0
; CHECK-NEXT: mov s5, v0.s[1]
; CHECK-NEXT: fadd s4, s0, s4
; CHECK-NEXT: fadd s4, s4, s5
; CHECK-NEXT: mov s5, v0.s[2]
; CHECK-NEXT: mov s0, v0.s[3]
Expand Down Expand Up @@ -123,6 +202,40 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: mov s1, v3.s[3]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
ret float %b
}

define float @test_v16f32_neutral(<16 x float> %a) nounwind {
; CHECK-LABEL: test_v16f32_neutral:
; CHECK: // %bb.0:
; CHECK-NEXT: faddp s4, v0.2s
; CHECK-NEXT: mov s5, v0.s[2]
; CHECK-NEXT: mov s0, v0.s[3]
; CHECK-NEXT: fadd s4, s4, s5
; CHECK-NEXT: fadd s0, s4, s0
; CHECK-NEXT: mov s5, v1.s[1]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: mov s4, v1.s[2]
; CHECK-NEXT: fadd s0, s0, s5
; CHECK-NEXT: mov s1, v1.s[3]
; CHECK-NEXT: fadd s0, s0, s4
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: mov s5, v2.s[1]
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: mov s4, v2.s[2]
; CHECK-NEXT: fadd s0, s0, s5
; CHECK-NEXT: mov s1, v2.s[3]
; CHECK-NEXT: fadd s0, s0, s4
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: mov s2, v3.s[1]
; CHECK-NEXT: fadd s0, s0, s3
; CHECK-NEXT: mov s5, v3.s[2]
; CHECK-NEXT: fadd s0, s0, s2
; CHECK-NEXT: fadd s0, s0, s5
; CHECK-NEXT: mov s1, v3.s[3]
; CHECK-NEXT: fadd s0, s0, s1
; CHECK-NEXT: ret
%b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
ret float %b
}
37 changes: 30 additions & 7 deletions llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll
Expand Up @@ -7,14 +7,15 @@ declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)

declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)

define half @test_v1f16(<1 x half> %a) nounwind {
; CHECK-LABEL: test_v1f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan half @llvm.vector.reduce.fadd.f16.v1f16(half 0.0, <1 x half> %a)
%b = call reassoc half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
ret half %b
}

Expand All @@ -24,23 +25,23 @@ define float @test_v1f32(<1 x float> %a) nounwind {
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v1f32(float 0.0, <1 x float> %a)
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
ret float %b
}

define double @test_v1f64(<1 x double> %a) nounwind {
; CHECK-LABEL: test_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan double @llvm.vector.reduce.fadd.f64.v1f64(double 0.0, <1 x double> %a)
%b = call reassoc double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
ret double %b
}

define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
; CHECK-LABEL: test_v1f128:
; CHECK: // %bb.0:
; CHECK-NEXT: ret
%b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 zeroinitializer, <1 x fp128> %a)
%b = call reassoc fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
ret fp128 %b
}

Expand All @@ -52,7 +53,29 @@ define float @test_v3f32(<3 x float> %a) nounwind {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v3f32(float 0.0, <3 x float> %a)
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
ret float %b
}

define float @test_v5f32(<5 x float> %a) nounwind {
; CHECK-LABEL: test_v5f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
; CHECK-NEXT: movi v5.2d, #0000000000000000
; CHECK-NEXT: mov v0.s[1], v1.s[0]
; CHECK-NEXT: mov v0.s[2], v2.s[0]
; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
; CHECK-NEXT: mov v0.s[3], v3.s[0]
; CHECK-NEXT: mov v5.s[0], v4.s[0]
; CHECK-NEXT: fadd v0.4s, v0.4s, v5.4s
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
ret float %b
}

Expand All @@ -63,7 +86,7 @@ define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
; CHECK-NEXT: bl __addtf3
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%b = call fast nnan fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 zeroinitializer, <2 x fp128> %a)
%b = call reassoc fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
ret fp128 %b
}

Expand All @@ -77,6 +100,6 @@ define float @test_v16f32(<16 x float> %a) nounwind {
; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
; CHECK-NEXT: faddp s0, v0.2s
; CHECK-NEXT: ret
%b = call fast nnan float @llvm.vector.reduce.fadd.f32.v16f32(float 0.0, <16 x float> %a)
%b = call reassoc float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)
ret float %b
}

0 comments on commit fa48ff3

Please sign in to comment.