From d09de888f4a8f58bf6f6c8e1fec5aa7317190d9a Mon Sep 17 00:00:00 2001
From: Victor Guerra <vm.guerramoran@criteo.com>
Date: Wed, 30 Oct 2019 12:49:03 +0100
Subject: [PATCH 1/6] [AutoDiff] Defines remaining derivatives for tgmath
 functions.

The following math functions are now differentiable:

* `remainder`
* `fmod`
* `ceil`
* `floor`
* `round`
* `trunc`

As well, this PR makes usage of @differentiating instead of
@differentiable attribute for derivate registration.

NOTE: For the time being this exposes a compiler crash that might ( or not )
be related to [TF-429](https://bugs.swift.org/browse/TF-429).

Resolves [TF-812](https://bugs.swift.org/browse/TF-812)
---
 stdlib/public/Platform/tgmath.swift.gyb | 69 ++++++++++++++++++++-----
 test/stdlib/tgmath.swift.gyb            | 10 ++++
 2 files changed, 66 insertions(+), 13 deletions(-)

diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb
index 0d1526865353d..2b22a28824764 100644
--- a/stdlib/public/Platform/tgmath.swift.gyb
+++ b/stdlib/public/Platform/tgmath.swift.gyb
@@ -20,22 +20,11 @@ public func fabs<T: FloatingPoint>(_ x: T) -> T {
 }
 
 @_transparent
-// SWIFT_ENABLE_TENSORFLOW
-@differentiable(
-  vjp: _vjpSqrt
-  where T : Differentiable & FloatingPoint, T == T.TangentVector
-)
 public func sqrt<T: FloatingPoint>(_ x: T) -> T {
   return x.squareRoot()
 }
 
 @_transparent
-// SWIFT_ENABLE_TENSORFLOW
-@differentiable(
-  wrt: (x, y, z),
-  vjp: _vjpFma
-  where T : Differentiable & FloatingPoint, T == T.TangentVector
-)
 public func fma<T: FloatingPoint>(_ x: T, _ y: T, _ z: T) -> T {
   return z.addingProduct(x, y)
 }
@@ -95,22 +84,75 @@ public func frexp<T: BinaryFloatingPoint>(_ x: T) -> (T, Int) {
 
 // SWIFT_ENABLE_TENSORFLOW
 @usableFromInline
+@differentiating(sqrt)
 func _vjpSqrt<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (T, (T) -> T) where T == T.TangentVector {
+) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
   let value = x.squareRoot()
   return (value, { v in v / (2 * value) })
 }
 
 @usableFromInline
+@differentiating(fma)
 func _vjpFma<T: FloatingPoint & Differentiable> (
   _ x: T,
   _ y: T,
   _ z: T
-) -> (T, (T) -> (T, T, T)) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> (T, T, T)) where T == T.TangentVector {
   return (fma(x, y, z), { v in (v * y, v * x, v) })
 }
 
+@usableFromInline
+@differentiating(remainder)
+func _vjpRemainder<T: FloatingPoint & Differentiable> (
+  _ x: T,
+  _ y: T
+) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector {
+  return (remainder(x, y), { v in (v, v) })
+}
+
+@usableFromInline
+@differentiating(fmod)
+func _vjpFmod<T: FloatingPoint & Differentiable> (
+  _ x: T,
+  _ y: T
+) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector {
+  return (fmod(x, y), { v in (v, v) })
+}
+
+@usableFromInline
+@differentiating(ceil)
+func _vjpCeil<T: FloatingPoint & Differentiable> (
+  _ x: T
+) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+  return (ceil(x), { v in 0 })
+}
+
+@usableFromInline
+@differentiating(floor)
+func _vjpFloor<T: FloatingPoint & Differentiable> (
+  _ x: T
+) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+  return (floor(x), { v in 0 })
+}
+
+@usableFromInline
+@differentiating(round)
+func _vjpRound<T: FloatingPoint & Differentiable> (
+  _ x: T
+) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+  return (round(x), { v in 0 })
+}
+
+@usableFromInline
+@differentiating(trunc)
+func _vjpTrunc<T: FloatingPoint & Differentiable> (
+  _ x: T
+) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+  return (trunc(x), { v in 0 })
+}
+// SWIFT_ENABLE_TENSORFLOW END
+
 %for T in ['Float','Double']:
 @available(swift, deprecated: 4.2, renamed: "scalbn")
 @_transparent
@@ -233,6 +275,7 @@ func _vjpErf(_ x: ${T}) -> (${T}, (${T}) -> ${T}) {
 func _vjpErfc(_ x: ${T}) -> (${T}, (${T}) -> ${T}) {
   return (erfc(x), { v in v * -${T}(M_2_SQRTPI) * exp(-x * x) })
 }
+// SWIFT_ENABLE_TENSORFLOW END
 %   if T == 'Float80':
 #endif
 %   end
diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb
index 65341df93ab14..f2eea08eecc74 100644
--- a/test/stdlib/tgmath.swift.gyb
+++ b/test/stdlib/tgmath.swift.gyb
@@ -273,6 +273,16 @@ MathTests.test("gradient_${T}") {
   expectEqualWithTolerance(5.0, fmaGrad.0, ulps: 16)
   expectEqualWithTolerance(4.0, fmaGrad.1, ulps: 16)
   expectEqualWithTolerance(1.0, fmaGrad.2, ulps: 16)
+  let remainderGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in remainder(x, y) })
+  expectEqualWithTolerance(1.0, remainderGrad.0, ulps: 16)
+  expectEqualWithTolerance(1.0, remainderGrad.1, ulps: 16)
+  let fmodGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in fmod(x, y) })
+  expectEqualWithTolerance(1.0, fmodGrad.0, ulps: 16)
+  expectEqualWithTolerance(1.0, fmodGrad.1, ulps: 16)
+  expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { ceil($0) }), ulps: 16)
+  expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { floor($0) }), ulps: 16)
+  expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { round($0) }), ulps: 16)
+  expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { trunc($0) }), ulps: 16)
 }
 %end
 

From 6a0dc664a6a8a325508b6d3d64c744e77df9e7e1 Mon Sep 17 00:00:00 2001
From: Dan Zheng <danielzheng@google.com>
Date: Wed, 6 Nov 2019 18:03:44 -0800
Subject: [PATCH 2/6] Use `@differentiating` with `pullback:` label.

Change functions declared with the `@differentiating` attribute to return
a tuple with the `pullback:` label instead of the `differential:` label.

The `pullback:` label indicates that the `@differentiating` function
is a reverse-mode derivative function (VJP), not a forward-mode derivative
function (JVP).

Eventually, when `@differentiable(linear)` functions and transposition
are fully implemented, `@differentiating` attribute may be changed to
only register differential-returning derivative functions.
---
 stdlib/public/Platform/tgmath.swift.gyb | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb
index 2b22a28824764..02c7cce5fb0c3 100644
--- a/stdlib/public/Platform/tgmath.swift.gyb
+++ b/stdlib/public/Platform/tgmath.swift.gyb
@@ -87,7 +87,7 @@ public func frexp<T: BinaryFloatingPoint>(_ x: T) -> (T, Int) {
 @differentiating(sqrt)
 func _vjpSqrt<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
   let value = x.squareRoot()
   return (value, { v in v / (2 * value) })
 }
@@ -124,7 +124,7 @@ func _vjpFmod<T: FloatingPoint & Differentiable> (
 @differentiating(ceil)
 func _vjpCeil<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
   return (ceil(x), { v in 0 })
 }
 
@@ -132,7 +132,7 @@ func _vjpCeil<T: FloatingPoint & Differentiable> (
 @differentiating(floor)
 func _vjpFloor<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
   return (floor(x), { v in 0 })
 }
 
@@ -140,7 +140,7 @@ func _vjpFloor<T: FloatingPoint & Differentiable> (
 @differentiating(round)
 func _vjpRound<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
   return (round(x), { v in 0 })
 }
 
@@ -148,7 +148,7 @@ func _vjpRound<T: FloatingPoint & Differentiable> (
 @differentiating(trunc)
 func _vjpTrunc<T: FloatingPoint & Differentiable> (
   _ x: T
-) -> (value: T, differential: (T) -> T) where T == T.TangentVector {
+) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
   return (trunc(x), { v in 0 })
 }
 // SWIFT_ENABLE_TENSORFLOW END

From ef48b6ab6b341a6e5df84b30d75e7579999d837d Mon Sep 17 00:00:00 2001
From: Victor Guerra <vm.guerramoran@criteo.com>
Date: Thu, 7 Nov 2019 16:09:04 +0100
Subject: [PATCH 3/6] Sqrt's VJP uses `sqrt` to compute value

---
 stdlib/public/Platform/tgmath.swift.gyb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb
index 02c7cce5fb0c3..037cff1a84de2 100644
--- a/stdlib/public/Platform/tgmath.swift.gyb
+++ b/stdlib/public/Platform/tgmath.swift.gyb
@@ -88,7 +88,7 @@ public func frexp<T: BinaryFloatingPoint>(_ x: T) -> (T, Int) {
 func _vjpSqrt<T: FloatingPoint & Differentiable> (
   _ x: T
 ) -> (value: T, pullback: (T) -> T) where T == T.TangentVector {
-  let value = x.squareRoot()
+  let value = sqrt(x)
   return (value, { v in v / (2 * value) })
 }
 

From a30582e003c9f897d1eb7c299c38f2a320afb93e Mon Sep 17 00:00:00 2001
From: Victor Guerra <vm.guerramoran@criteo.com>
Date: Wed, 13 Nov 2019 13:36:46 +0100
Subject: [PATCH 4/6] Fixing derivatives of `remainder` and `fmod` w.r.t. y
 variable.

As well, extend the test strategy to double check derivatives.
---
 stdlib/public/Platform/tgmath.swift.gyb |  4 ++--
 test/stdlib/tgmath.swift.gyb            | 29 ++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/stdlib/public/Platform/tgmath.swift.gyb b/stdlib/public/Platform/tgmath.swift.gyb
index 037cff1a84de2..f2e4c1eace2a6 100644
--- a/stdlib/public/Platform/tgmath.swift.gyb
+++ b/stdlib/public/Platform/tgmath.swift.gyb
@@ -108,7 +108,7 @@ func _vjpRemainder<T: FloatingPoint & Differentiable> (
   _ x: T,
   _ y: T
 ) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector {
-  return (remainder(x, y), { v in (v, v) })
+  return (remainder(x, y), { v in (v, -v * ((x / y).rounded(.toNearestOrEven))) })
 }
 
 @usableFromInline
@@ -117,7 +117,7 @@ func _vjpFmod<T: FloatingPoint & Differentiable> (
   _ x: T,
   _ y: T
 ) -> (value: T, pullback: (T) -> (T, T)) where T == T.TangentVector {
-  return (fmod(x, y), { v in (v, v) })
+  return (fmod(x, y), { v in (v, -v * ((x / y).rounded(.towardZero))) })
 }
 
 @usableFromInline
diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb
index f2eea08eecc74..490a4100e3759 100644
--- a/test/stdlib/tgmath.swift.gyb
+++ b/test/stdlib/tgmath.swift.gyb
@@ -53,6 +53,18 @@ func expectEqualWithTolerance<T>(_ expected: TestLiteralType, _ actual: T,
              file: file, line: line)
 }
 
+func checkGradient<T: BinaryFloatingPoint & Differentiable>(
+  _ f: @differentiable (T, T) -> T,
+  _ x: T,
+  _ y: T) where T == T.TangentVector {
+  let eps = T(0.001)
+  let grad = gradient(at: x, y, in: f)
+  let dfdx = (f(x + eps, y) - f(x, y)) / eps
+  let dfdy = (f(x, y + eps) - f(x, y)) / eps
+  expectEqualWithTolerance(TestLiteralType(dfdx.rounded(.toNearestOrAwayFromZero)), grad.0)
+  expectEqualWithTolerance(TestLiteralType(dfdy.rounded(.toNearestOrAwayFromZero)), grad.1)
+}
+
 %{
 unary = [
   'acos', 'asin', 'atan',
@@ -273,16 +285,21 @@ MathTests.test("gradient_${T}") {
   expectEqualWithTolerance(5.0, fmaGrad.0, ulps: 16)
   expectEqualWithTolerance(4.0, fmaGrad.1, ulps: 16)
   expectEqualWithTolerance(1.0, fmaGrad.2, ulps: 16)
-  let remainderGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in remainder(x, y) })
-  expectEqualWithTolerance(1.0, remainderGrad.0, ulps: 16)
-  expectEqualWithTolerance(1.0, remainderGrad.1, ulps: 16)
-  let fmodGrad = gradient(at: 4.0 as ${T}, 5.0 as ${T}, in: { x, y in fmod(x, y) })
-  expectEqualWithTolerance(1.0, fmodGrad.0, ulps: 16)
-  expectEqualWithTolerance(1.0, fmodGrad.1, ulps: 16)
   expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { ceil($0) }), ulps: 16)
   expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { floor($0) }), ulps: 16)
   expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { round($0) }), ulps: 16)
   expectEqualWithTolerance(0.0, gradient(at: 2.0 as ${T}, in: { trunc($0) }), ulps: 16)
+  for a in -10...10 {
+    let x = ${T}(a)
+    for b in -10...10 {
+      let y = ${T}(b)
+      guard b != 0 && remainder(x, y).sign == remainder(x + ${T}(0.001), y).sign &&
+        remainder(x, y).sign == remainder(x, y + ${T}(0.001)).sign
+        else { continue }
+      checkGradient({ remainder($0, $1) }, x, y)
+      checkGradient({ fmod($0, $1) }, x, y)
+    }
+  }
 }
 %end
 

From 4ca8cc5010b947197408cb9588dd10c9019fc7ca Mon Sep 17 00:00:00 2001
From: Victor Guerra <vm.guerramoran@criteo.com>
Date: Thu, 14 Nov 2019 23:50:44 +0100
Subject: [PATCH 5/6] avoid rounding of expected derivative values in tests.

as well, addressing formatting remakrs.
---
 test/stdlib/tgmath.swift.gyb | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb
index 490a4100e3759..9ef9dedba8151 100644
--- a/test/stdlib/tgmath.swift.gyb
+++ b/test/stdlib/tgmath.swift.gyb
@@ -56,13 +56,14 @@ func expectEqualWithTolerance<T>(_ expected: TestLiteralType, _ actual: T,
 func checkGradient<T: BinaryFloatingPoint & Differentiable>(
   _ f: @differentiable (T, T) -> T,
   _ x: T,
-  _ y: T) where T == T.TangentVector {
-  let eps = T(0.001)
+  _ y: T)
+where T == T.TangentVector {
+  let eps = T(0.1)
   let grad = gradient(at: x, y, in: f)
   let dfdx = (f(x + eps, y) - f(x, y)) / eps
   let dfdy = (f(x, y + eps) - f(x, y)) / eps
-  expectEqualWithTolerance(TestLiteralType(dfdx.rounded(.toNearestOrAwayFromZero)), grad.0)
-  expectEqualWithTolerance(TestLiteralType(dfdy.rounded(.toNearestOrAwayFromZero)), grad.1)
+  expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 32)
+  expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 32)
 }
 
 %{

From 3dc7ee0996bd887dc8a10f3cfe7aae33dd807256 Mon Sep 17 00:00:00 2001
From: Victor Guerra <vm.guerramoran@criteo.com>
Date: Mon, 18 Nov 2019 22:02:56 +0100
Subject: [PATCH 6/6] Decreasing epsilon used to compute derivative by
 definition.

From 0.1 to 0.01 but we need to adjust ulps to 192.
---
 test/stdlib/tgmath.swift.gyb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/stdlib/tgmath.swift.gyb b/test/stdlib/tgmath.swift.gyb
index 9ef9dedba8151..47cb8e13a6961 100644
--- a/test/stdlib/tgmath.swift.gyb
+++ b/test/stdlib/tgmath.swift.gyb
@@ -58,12 +58,12 @@ func checkGradient<T: BinaryFloatingPoint & Differentiable>(
   _ x: T,
   _ y: T)
 where T == T.TangentVector {
-  let eps = T(0.1)
+  let eps = T(0.01)
   let grad = gradient(at: x, y, in: f)
   let dfdx = (f(x + eps, y) - f(x, y)) / eps
   let dfdy = (f(x, y + eps) - f(x, y)) / eps
-  expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 32)
-  expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 32)
+  expectEqualWithTolerance(TestLiteralType(dfdx), grad.0, ulps: 192)
+  expectEqualWithTolerance(TestLiteralType(dfdy), grad.1, ulps: 192)
 }
 
 %{