In [None]:
%install '.package(path: "$cwd/FastaiNotebooks")' FastaiNotebooks

Installing packages:
	.package(path: "/home/ubuntu/notebooks/swift/FastaiNotebooks")
		FastaiNotebooks
With SwiftPM flags: []
Working in: /tmp/tmpdu64dpmu
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 1.27s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'FastaiNotebooks' (3 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


In [None]:
import FastaiNotebooks

In [None]:
// export
import Path
import TensorFlow

### Data

In [None]:
let mnist = MnistDataset(path: Path.home/".fastai"/"data"/"mnist_tst")
var xTrain = mnist.xTrain
var yTrain = mnist.yTrain
var xValid = mnist.xValid
var yValid = mnist.yValid

In [None]:
let trainMean = xTrain.mean()
let trainStd  = xTrain.std()

In [None]:
xTrain = normalize(xTrain, mean: trainMean, std: trainStd)
xValid = normalize(xValid, mean: trainMean, std: trainStd)

In [None]:
let (n,m) = (Int(xTrain.shape[0]),Int(xTrain.shape[1]))
let c = yTrain.max()+1
print(n,m,c)

60000 784 10


Those can't be used to define a model cause they're not Ints though...

In [None]:
let (n,m) = (60000,784)
let c = 10
let nHid = 50

In [None]:
struct MyModel: Layer {
    var layer1 = Dense<Float>(inputSize: m, outputSize: nHid, activation: relu)
    var layer2 = Dense<Float>(inputSize: nHid, outputSize: c)
    
    @differentiable
    func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
        return input.sequenced(in: context, through: layer1, layer2)
    }
}

In [None]:
var model = MyModel()

In [None]:
let pred = model.applied(to: xTrain)

### Cross entropy loss

Convert this part of 03 to get to...

In [None]:
let loss = softmaxCrossEntropy(logits: pred, labels: yTrain)

## Basic training loop

Basically the training loop repeats over the following steps:
- get the output of the model on a batch of inputs
- compare the output to the labels we have and compute a loss
- calculate the gradients of the loss with respect to every parameter of the model
- update said parameters with those gradients to make them a little bit better

In [None]:
// export
public func accuracy(_ output: Tensor<Float>, _ target: Tensor<Int32>) -> Tensor<Float>{
    let corrects = Tensor<Float>(output.argmax(squeezingAxis: 1) .== target)
    return corrects.mean()
}

In [None]:
print(accuracy(pred, yTrain))

0.061683334


In [None]:
let bs:Int32=64                         // batch size
let xb = xTrain[0..<bs]          // a mini-batch from x
let preds = model.applied(to: xb) //predictions
print(preds[0], preds.shape)

[1.341708, 1.3575298, 0.029310167, -0.4693321, -0.59524554, 0.61489546, -2.1155276, 2.4923306, 1.4987891, 0.0150280595] TensorShape(dimensions: [64, 10])


In [None]:
let yb = yTrain[0..<bs]
let loss = softmaxCrossEntropy(logits: preds, labels: yb)

In [None]:
print(accuracy(preds, yb))

0.0625


In [None]:
let lr:Float = 0.5   // learning rate
let epochs = 1      // how many epochs to train for

We can't do the training loop by hand since gradients are blocked by TF-417

In [None]:
let optimizer = SGD<MyModel, Float>(learningRate: lr)

In [None]:
let trainingContext = Context(learningPhase: .training)

In [None]:
for epoch in 1...epochs{
    for i in 0..<((n-1)/Int(bs)){
        let startIdx = Int32(i) * bs
        let endIdx = startIdx + bs
        let xb = xTrain[startIdx..<endIdx]
        let yb = yTrain[startIdx..<endIdx]
        let (loss, grads) = model.valueWithGradient { model -> Tensor<Float> in
            let preds = model.applied(to: xb, in: trainingContext)
            return softmaxCrossEntropy(logits: preds, labels: yb)
        }
        optimizer.update(&model.allDifferentiableVariables, along: grads)
    }
}

## Dataset and DataLoader

We can easily create a swift Dataset from our arrays. It will automatically batch things for us.

In [None]:
struct Batch: TensorGroup{
    let x: Tensor<Float>
    let y: Tensor<Int32>
}

In [None]:
let train_ds:Dataset<Batch> = Dataset(elements:Batch(x:xTrain, y:yTrain)).batched(Int64(bs))

In [None]:
for epoch in 1...epochs{
    for batch in train_ds{
        let (loss, grads) = model.valueWithGradient { model -> Tensor<Float> in
            let preds = model.applied(to: batch.x, in: trainingContext)
            return softmaxCrossEntropy(logits: preds, labels: batch.y)
        }
        optimizer.update(&model.allDifferentiableVariables, along: grads)
    }
}

This `Dataset` can also do the shuffle for us:

In [None]:
for epoch in 1...epochs{
    for batch in train_ds.shuffled(){
        let (loss, grads) = model.valueWithGradient { model -> Tensor<Float> in
            let preds = model.applied(to: batch.x, in: trainingContext)
            return softmaxCrossEntropy(logits: preds, labels: batch.y)
        }
        optimizer.update(&model.allDifferentiableVariables, along: grads)
    }
}

### Training loop

In [None]:
// Example loss function.
// TODO: This should be moved into the TensorFlow library/APIs.
@differentiable(vjp: _vjpSoftmaxCrossEntropy)
func softmaxCrossEntropy<Scalar: TensorFlowFloatingPoint>(
    features: Tensor<Scalar>, labels: Tensor<Scalar>
) -> Tensor<Scalar> {
    return Raw.softmaxCrossEntropyWithLogits(features: features, labels: labels).loss.mean()
}

@usableFromInline
func _vjpSoftmaxCrossEntropy<Scalar: TensorFlowFloatingPoint>(
    features: Tensor<Scalar>, labels: Tensor<Scalar>
) -> (Tensor<Scalar>, (Tensor<Scalar>) -> (Tensor<Scalar>, Tensor<Scalar>)) {
    let (loss, grad) = Raw.softmaxCrossEntropyWithLogits(features: features, labels: labels)
    let batchSize = Tensor<Scalar>(features.shapeTensor[0])
    return (loss.mean(), { v in ((v / batchSize) * grad, Tensor<Scalar>(0)) })
}

In [None]:
// Example type for use with `Dataset`.
// TODO: The usage of this should be re-evaluated.
public struct Example<DataScalar, LabelScalar>: TensorGroup
    where DataScalar: TensorFlowFloatingPoint,
          LabelScalar: TensorFlowFloatingPoint {
    public var data: Tensor<DataScalar>
    public var labels: Tensor<LabelScalar>
}

In [None]:
/// A training loop.
///
/// Trains the given model at the given keypath 
public func train<M, O: Optimizer, S>(
    _ model: inout M,
    at variablesKeyPath: WritableKeyPath<M, M.AllDifferentiableVariables>,
    on dataset: Dataset<Example<S, S>>,
    using optimizer: inout O,
    loss: @escaping @differentiable (Tensor<S>, Tensor<S>) -> Tensor<S>
) where O.Model == M, O.Scalar == S,
        M.Input == Tensor<S>, M.Output == Tensor<S>
{
    let context = Context(learningPhase: .training)
    for batch in dataset {
        let (x, y) = (batch.data, batch.labels)
        let (loss, (𝛁model, _)) = model.valueWithGradient(at: y) { (model, y) -> Tensor<S> in
            let preds = model.applied(to: x, in: context)
            return loss(preds, y)
        }
        print(loss)
        optimizer.update(&model[keyPath: variablesKeyPath], along: 𝛁model)
    }
}

In [None]:
let train_ds:Dataset<Example> = Dataset(elements:Example(data:xTrain, labels:yTrain)).batched(Int64(bs))

### Export

In [None]:
notebookToScript(fname: (Path.cwd / "03_minibtach_training.ipynb").string)