From d09de888f4a8f58bf6f6c8e1fec5aa7317190d9a Mon Sep 17 00:00:00 2001 From: Victor Guerra Date: Wed, 30 Oct 2019 12:49:03 +0100 Subject: [PATCH 1/6] [AutoDiff] Defines remaining derivatives for tgmath functions. The following math functions are now differentiable: * `remainder` * `fmod` * `ceil` * `floor` * `round` * `trunc` As well, this PR makes usage of @differentiating instead of @differentiable attribute for derivate registration. NOTE: For the time being this exposes a compiler crash that might ( or not ) be related to [TF-429](https://bugs.swift.org/browse/TF-429). Resolves [TF-812](https://bugs.swift.org/browse/TF-812) --- stdlib/public/Platform/tgmath.swift.gyb | 69 ++++++++++++++++++++----- test/stdlib/tgmath.swift.gyb | 10 ++++ 2 files changed, 66 insertions(+), 13 deletions(-) diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb index 0d1526865353d..2b22a28824764 100644 --- a/stdlib/public/Platform/tgmath.swift.gyb +++ b/stdlib/public/Platform/tgmath.swift.gyb @@ -20,22 +20,11 @@ public func fabs(_ x: T) -> T { } @_transparent -// SWIFT_ENABLE_TENSORFLOW -@differentiable( - vjp: _vjpSqrt - where T : Differentiable & FloatingPoint, T == T.TangentVector -) public func sqrt(_ x: T) -> T { return x.squareRoot() } @_transparent -// SWIFT_ENABLE_TENSORFLOW -@differentiable( - wrt: (x, y, z), - vjp: _vjpFma - where T : Differentiable & FloatingPoint, T == T.TangentVector -) public func fma(_ x: T, _ y: T, _ z: T) -> T { return z.addingProduct(x, y) } @@ -95,22 +84,75 @@ public func frexp(_ x: T) -> (T, Int) { // SWIFT_ENABLE_TENSORFLOW @usableFromInline +@differentiating(sqrt) func _vjpSqrt ( _ x: T -) -> (T, (T) -> T) where T == T.TangentVector { +) -> (value: T, differential: (T) -> T) where T == T.TangentVector { let value = x.squareRoot() return (value, { v in v / (2 * value) }) } @usableFromInline +@differentiating(fma) func _vjpFma ( _ x: T, _ y: T, _ z: T -) -> (T, (T) -> (T, T, T)) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> (T, T, T)) where T == T.TangentVector { return (fma(x, y, z), { v in (v * y, v * x, v) }) } +@usableFromInline +@differentiating(remainder) +func _vjpRemainder ( + _ x: T, + _ y: T +) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector { + return (remainder(x, y), { v in (v, v) }) +} + +@usableFromInline +@differentiating(fmod) +func _vjpFmod ( + _ x: T, + _ y: T +) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector { + return (fmod(x, y), { v in (v, v) }) +} + +@usableFromInline +@differentiating(ceil) +func _vjpCeil ( + _ x: T +) -> (value: T, differential: (T) -> T) where T == T.TangentVector { + return (ceil(x), { v in 0 }) +} + +@usableFromInline +@differentiating(floor) +func _vjpFloor ( + _ x: T +) -> (value: T, differential: (T) -> T) where T == T.TangentVector { + return (floor(x), { v in 0 }) +} + +@usableFromInline +@differentiating(round) +func _vjpRound ( + _ x: T +) -> (value: T, differential: (T) -> T) where T == T.TangentVector { + return (round(x), { v in 0 }) +} + +@usableFromInline +@differentiating(trunc) +func _vjpTrunc ( + _ x: T +) -> (value: T, differential: (T) -> T) where T == T.TangentVector { + return (trunc(x), { v in 0 }) +} +// SWIFT_ENABLE_TENSORFLOW END + %for T in ['Float','Double']: @available(swift, deprecated: 4.2, renamed: "scalbn") @_transparent @@ -233,6 +275,7 @@ func _vjpErf(_ x: ${T}) -> (${T}, (${T}) -> ${T}) { func _vjpErfc(_ x: ${T}) -> (${T}, (${T}) -> ${T}) { return (erfc(x), { v in v * -${T}(M_2_SQRTPI) * exp(-x * x) }) } +// SWIFT_ENABLE_TENSORFLOW END % if T == 'Float80': #endif % end diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb index 65341df93ab14..f2eea08eecc74 100644 --- a/test/stdlib/tgmath.swift.gyb +++ b/test/stdlib/tgmath.swift.gyb @@ -273,6 +273,16 @@ MathTests.test("gradient_${T}") { expectEqualWithTolerance(5.0, fmaGrad.0, ulps: 16) expectEqualWithTolerance(4.0, fmaGrad.1, ulps: 16) expectEqualWithTolerance(1.0, fmaGrad.2, ulps: 16) + let remainderGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in remainder(x, y) }) + expectEqualWithTolerance(1.0, remainderGrad.0, ulps: 16) + expectEqualWithTolerance(1.0, remainderGrad.1, ulps: 16) + let fmodGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in fmod(x, y) }) + expectEqualWithTolerance(1.0, fmodGrad.0, ulps: 16) + expectEqualWithTolerance(1.0, fmodGrad.1, ulps: 16) + expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { ceil($0) }), ulps: 16) + expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { floor($0) }), ulps: 16) + expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { round($0) }), ulps: 16) + expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { trunc($0) }), ulps: 16) } %end From 6a0dc664a6a8a325508b6d3d64c744e77df9e7e1 Mon Sep 17 00:00:00 2001 From: Dan Zheng Date: Wed, 6 Nov 2019 18:03:44 -0800 Subject: [PATCH 2/6] Use `@differentiating` with `pullback:` label. Change functions declared with the `@differentiating` attribute to return a tuple with the `pullback:` label instead of the `differential:` label. The `pullback:` label indicates that the `@differentiating` function is a reverse-mode derivative function (VJP), not a forward-mode derivative function (JVP). Eventually, when `@differentiable(linear)` functions and transposition are fully implemented, `@differentiating` attribute may be changed to only register differential-returning derivative functions. --- stdlib/public/Platform/tgmath.swift.gyb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb index 2b22a28824764..02c7cce5fb0c3 100644 --- a/stdlib/public/Platform/tgmath.swift.gyb +++ b/stdlib/public/Platform/tgmath.swift.gyb @@ -87,7 +87,7 @@ public func frexp(_ x: T) -> (T, Int) { @differentiating(sqrt) func _vjpSqrt ( _ x: T -) -> (value: T, differential: (T) -> T) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { let value = x.squareRoot() return (value, { v in v / (2 * value) }) } @@ -124,7 +124,7 @@ func _vjpFmod ( @differentiating(ceil) func _vjpCeil ( _ x: T -) -> (value: T, differential: (T) -> T) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { return (ceil(x), { v in 0 }) } @@ -132,7 +132,7 @@ func _vjpCeil ( @differentiating(floor) func _vjpFloor ( _ x: T -) -> (value: T, differential: (T) -> T) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { return (floor(x), { v in 0 }) } @@ -140,7 +140,7 @@ func _vjpFloor ( @differentiating(round) func _vjpRound ( _ x: T -) -> (value: T, differential: (T) -> T) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { return (round(x), { v in 0 }) } @@ -148,7 +148,7 @@ func _vjpRound ( @differentiating(trunc) func _vjpTrunc ( _ x: T -) -> (value: T, differential: (T) -> T) where T == T.TangentVector { +) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { return (trunc(x), { v in 0 }) } // SWIFT_ENABLE_TENSORFLOW END From ef48b6ab6b341a6e5df84b30d75e7579999d837d Mon Sep 17 00:00:00 2001 From: Victor Guerra Date: Thu, 7 Nov 2019 16:09:04 +0100 Subject: [PATCH 3/6] Sqrt's VJP uses `sqrt` to compute value --- stdlib/public/Platform/tgmath.swift.gyb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb index 02c7cce5fb0c3..037cff1a84de2 100644 --- a/stdlib/public/Platform/tgmath.swift.gyb +++ b/stdlib/public/Platform/tgmath.swift.gyb @@ -88,7 +88,7 @@ public func frexp(_ x: T) -> (T, Int) { func _vjpSqrt ( _ x: T ) -> (value: T, pullback: (T) -> T) where T == T.TangentVector { - let value = x.squareRoot() + let value = sqrt(x) return (value, { v in v / (2 * value) }) } From a30582e003c9f897d1eb7c299c38f2a320afb93e Mon Sep 17 00:00:00 2001 From: Victor Guerra Date: Wed, 13 Nov 2019 13:36:46 +0100 Subject: [PATCH 4/6] Fixing derivatives of `remainder` and `fmod` w.r.t. y variable. As well, extend the test strategy to double check derivatives. --- stdlib/public/Platform/tgmath.swift.gyb | 4 ++-- test/stdlib/tgmath.swift.gyb | 29 ++++++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb index 037cff1a84de2..f2e4c1eace2a6 100644 --- a/stdlib/public/Platform/tgmath.swift.gyb +++ b/stdlib/public/Platform/tgmath.swift.gyb @@ -108,7 +108,7 @@ func _vjpRemainder ( _ x: T, _ y: T ) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector { - return (remainder(x, y), { v in (v, v) }) + return (remainder(x, y), { v in (v, -v * ((x / y).rounded(.toNearestOrEven))) }) } @usableFromInline @@ -117,7 +117,7 @@ func _vjpFmod ( _ x: T, _ y: T ) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector { - return (fmod(x, y), { v in (v, v) }) + return (fmod(x, y), { v in (v, -v * ((x / y).rounded(.towardZero))) }) } @usableFromInline diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb index f2eea08eecc74..490a4100e3759 100644 --- a/test/stdlib/tgmath.swift.gyb +++ b/test/stdlib/tgmath.swift.gyb @@ -53,6 +53,18 @@ func expectEqualWithTolerance(_ expected: TestLiteralType, _ actual: T, file: file, line: line) } +func checkGradient( + _ f: @differentiable (T, T) -> T, + _ x: T, + _ y: T) where T == T.TangentVector { + let eps = T(0.001) + let grad = gradient(at: x, y, in: f) + let dfdx = (f(x + eps, y) - f(x, y)) / eps + let dfdy = (f(x, y + eps) - f(x, y)) / eps + expectEqualWithTolerance(TestLiteralType(dfdx.rounded(.toNearestOrAwayFromZero)), grad.0) + expectEqualWithTolerance(TestLiteralType(dfdy.rounded(.toNearestOrAwayFromZero)), grad.1) +} + %{ unary = [ 'acos', 'asin', 'atan', @@ -273,16 +285,21 @@ MathTests.test("gradient_${T}") { expectEqualWithTolerance(5.0, fmaGrad.0, ulps: 16) expectEqualWithTolerance(4.0, fmaGrad.1, ulps: 16) expectEqualWithTolerance(1.0, fmaGrad.2, ulps: 16) - let remainderGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in remainder(x, y) }) - expectEqualWithTolerance(1.0, remainderGrad.0, ulps: 16) - expectEqualWithTolerance(1.0, remainderGrad.1, ulps: 16) - let fmodGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in fmod(x, y) }) - expectEqualWithTolerance(1.0, fmodGrad.0, ulps: 16) - expectEqualWithTolerance(1.0, fmodGrad.1, ulps: 16) expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { ceil($0) }), ulps: 16) expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { floor($0) }), ulps: 16) expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { round($0) }), ulps: 16) expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { trunc($0) }), ulps: 16) + for a in -10...10 { + let x = ${T}(a) + for b in -10...10 { + let y = ${T}(b) + guard b != 0 && remainder(x, y).sign == remainder(x + ${T}(0.001), y).sign && + remainder(x, y).sign == remainder(x, y + ${T}(0.001)).sign + else { continue } + checkGradient({ remainder($0, $1) }, x, y) + checkGradient({ fmod($0, $1) }, x, y) + } + } } %end From 4ca8cc5010b947197408cb9588dd10c9019fc7ca Mon Sep 17 00:00:00 2001 From: Victor Guerra Date: Thu, 14 Nov 2019 23:50:44 +0100 Subject: [PATCH 5/6] avoid rounding of expected derivative values in tests. as well, addressing formatting remakrs. --- test/stdlib/tgmath.swift.gyb | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb index 490a4100e3759..9ef9dedba8151 100644 --- a/test/stdlib/tgmath.swift.gyb +++ b/test/stdlib/tgmath.swift.gyb @@ -56,13 +56,14 @@ func expectEqualWithTolerance(_ expected: TestLiteralType, _ actual: T, func checkGradient( _ f: @differentiable (T, T) -> T, _ x: T, - _ y: T) where T == T.TangentVector { - let eps = T(0.001) + _ y: T) +where T == T.TangentVector { + let eps = T(0.1) let grad = gradient(at: x, y, in: f) let dfdx = (f(x + eps, y) - f(x, y)) / eps let dfdy = (f(x, y + eps) - f(x, y)) / eps - expectEqualWithTolerance(TestLiteralType(dfdx.rounded(.toNearestOrAwayFromZero)), grad.0) - expectEqualWithTolerance(TestLiteralType(dfdy.rounded(.toNearestOrAwayFromZero)), grad.1) + expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 32) + expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 32) } %{ From 3dc7ee0996bd887dc8a10f3cfe7aae33dd807256 Mon Sep 17 00:00:00 2001 From: Victor Guerra Date: Mon, 18 Nov 2019 22:02:56 +0100 Subject: [PATCH 6/6] Decreasing epsilon used to compute derivative by definition. From 0.1 to 0.01 but we need to adjust ulps to 192. --- test/stdlib/tgmath.swift.gyb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb index 9ef9dedba8151..47cb8e13a6961 100644 --- a/test/stdlib/tgmath.swift.gyb +++ b/test/stdlib/tgmath.swift.gyb @@ -58,12 +58,12 @@ func checkGradient( _ x: T, _ y: T) where T == T.TangentVector { - let eps = T(0.1) + let eps = T(0.01) let grad = gradient(at: x, y, in: f) let dfdx = (f(x + eps, y) - f(x, y)) / eps let dfdy = (f(x, y + eps) - f(x, y)) / eps - expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 32) - expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 32) + expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 192) + expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 192) } %{