In [1]:
import _Differentiation

// https://github.com/PassiveLogic/differentiable-swift-examples/blob/main/Sources/BasicGradientDescent/main.swift
// In this example, we'll set up a very simple perceptron neural network and try to use gradient
// descent to have it mimic the functionality of an AND gate.

struct Perceptron: Differentiable {
    var weight1: Float = .random(in: -1..<1)
    var weight2: Float = .random(in: -1..<1)
    var bias: Float = 0.0

    @differentiable(reverse)
    func callAsFunction(_ x1: Float, _ x2: Float) -> Float {
        // Determine the weighted contribution from each input, plus bias.
        let output = (weight1 * x1) + (weight2 * x2) + bias
        // Apply a nonlinear activation function to the output.
        if output >= 0.0 {
            return output
        } else {
            return 0.1 * output
        }
    }
}

// This is our truth table for the expected output from various inputs.

let andGateData: [(x1: Float, x2: Float, y: Float)] = [
    (x1: 0, x2: 0, y: 0),
    (x1: 0, x2: 1, y: 0),
    (x1: 1, x2: 0, y: 0),
    (x1: 1, x2: 1, y: 1),
]

// A loss function provides a measure of how far off we are from our target behavior.

@differentiable(reverse)
func loss(model: Perceptron) -> Float {
    var loss: Float = 0
    for (x1, x2, y) in andGateData {
        let prediction = model(x1, x2)
        let error = y - prediction
        loss = loss + error * error / 2
    }
    return loss
}

// Finally, we initialize the model with random weights and a zero bias:

var model = Perceptron()

// and then we perform training by finding the loss, determining a tangent vector that would
// take us in a direction that should reduce that loss, and moving our model parameters by
// that tangent vector. Over the course of training, we'll watch our loss values decrease as the
// model is trained to replicate an AND gate.

for _ in 0..<100 {
    let (loss, pullback) = valueWithPullback(at: model, of: loss)
    print("Loss: \(loss)")
    let gradient = pullback(-0.1)
    model.move(by: gradient)
}

// Let's try out our trained model on some test values:

print("Trained model results:")

let value1 = model(1.0, 0.0)

print("Value at (1.0, 0.0): \(value1)")

let value2 = model(1.0, 1.0)

print("Value at (1.0, 1.0): \(value2)")

Loss: 0.6558181
Loss: 0.65133893
Loss: 0.64720356
Loss: 0.64334804
Loss: 0.63972056
Loss: 0.6362798
Loss: 0.632992
Loss: 0.6298307
Loss: 0.6267738
Loss: 0.623804
Loss: 0.6209073
Loss: 0.6180722
Loss: 0.61529
Loss: 0.61255276
Loss: 0.60985494
Loss: 0.60719156
Loss: 0.604559
Loss: 0.6019538
Loss: 0.5993736
Loss: 0.59681636
Loss: 0.5942802
Loss: 0.59176403
Loss: 0.58926666
Loss: 0.5867871
Loss: 0.58432454
Loss: 0.58187866
Loss: 0.5794486
Loss: 0.57703424
Loss: 0.5746349
Loss: 0.57225055
Loss: 0.5698807
Loss: 0.56752545
Loss: 0.5651843
Loss: 0.5628571
Loss: 0.5605438
Loss: 0.5582886
Loss: 0.5562146
Loss: 0.55427104
Loss: 0.5524215
Loss: 0.55064446
Loss: 0.54892695
Loss: 0.54726034
Loss: 0.545639
Loss: 0.54405844
Loss: 0.54251534
Loss: 0.54100674
Loss: 0.53953016
Loss: 0.53808343
Loss: 0.5366646
Loss: 0.5352719
Loss: 0.53390336
Loss: 0.5325578
Loss: 0.53123355
Loss: 0.52992946
Loss: 0.5286443
Loss: 0.527377
Loss: 0.5261263
Loss: 0.5248916
Loss: 0.5236716
Loss: 0.5224658
Loss: 0.5212735
Loss