In [0]:
%install '.package(path: "$cwd/FastaiNotebook_06_cuda")' FastaiNotebook_06_cuda

Installing packages:
	.package(path: "/content/FastaiNotebook_06_cuda")
		FastaiNotebook_06_cuda
With SwiftPM flags: []
Working in: /tmp/tmp3sf137q5/swift-install
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/saeta/Just
Fetching https://github.com/latenitesoft/NotebookExport
Completed resolution in 7.16s
Cloning https://github.com/saeta/Just
Resolving https://github.com/saeta/Just at 0.7.3
Cloning https://github.com/latenitesoft/NotebookExport
Resolving https://github.com/latenitesoft/NotebookExport at 0.6.0
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.3
[1/11] Compiling Just Just.swift
[2/11] Compiling Path Path+Attributes.swift
[3/11] Compiling Path Path+Codable.swift
[4/11] Compiling Path Path+CommonDirectories.swift
[5/11] Compiling Path Path+FileManager.swift
[6/11] Compiling Path Path+StringConvertibles.swift
[7/11] Compiling Path Path+ls.swift
[8/11] Compiling Path Path->Bool.swift
[9/11] Compiling Path

In [0]:
import FastaiNotebook_06_cuda
%include "EnableIPythonDisplay.swift"
IPythonDisplay.shell.enable_matplotlib("inline")

('inline', 'module://ipykernel.pylab.backend_inline')


In [0]:
//export
import Path
import TensorFlow
import Python



Let's start by building our own batchnorm layer from scratch. Eventually we want something like this to work:


In [0]:
class AlmostBatchNorm<Scalar: TensorFlowFloatingPoint> { // : Layer
    // Configuration hyperparameters
    let momentum, epsilon: Scalar
    // Running statistics
    var runningMean, runningVariance: Tensor<Scalar>
    // Trainable parameters
    var scale, offset: Tensor<Scalar>
    
    init(featureCount: Int, momentum: Scalar = 0.9, epsilon: Scalar = 1e-5) {
        (self.momentum, self.epsilon) = (momentum, epsilon)
        (scale, offset) = (Tensor(ones: [featureCount]), Tensor(zeros: [featureCount]))
        (runningMean, runningVariance) = (Tensor(0), Tensor(1))
    }

    func call(_ input: Tensor<Scalar>) -> Tensor<Scalar> {
        let mean, variance: Tensor<Scalar>
        switch Context.local.learningPhase {
        case .training:
            mean = input.mean(alongAxes: [0, 1, 2])
            variance = input.variance(alongAxes: [0, 1, 2])
            runningMean += (mean - runningMean) * (1 - momentum)
            runningVariance += (variance - runningVariance) * (1 - momentum)
        case .inference:
            (mean, variance) = (runningMean, runningVariance)
        }
        let normalizer = rsqrt(variance + epsilon) * scale
        return (input - mean) * normalizer + offset
    }
}

But there are some automatic differentiation limitations (lack of support for classes and control flow) that make this impossible for now, so we'll need a few workarounds. A Reference will let us update running statistics without making the layer a class or declaring the applied method mutating:

In [0]:
//export
public class Reference<T> {
    public var value: T
    public init(_ value: T) { self.value = value }
}

The following snippet will let us differentiate a layer's forward method (which is the one called in call for FALayer) if it's composed of training and inference implementations that are each differentiable:

In [0]:
//export
public protocol LearningPhaseDependent: FALayer {
    associatedtype Input
    associatedtype Output
    @differentiable func forwardTraining (_ input: Input) -> Output
    @differentiable func forwardInference(_ input: Input) -> Output
}

extension LearningPhaseDependent {
    public func forward(_ input: Input) -> Output {
        switch Context.local.learningPhase {
        case .training:  return forwardTraining(input)
        case .inference: return forwardInference(input)
        }
    }

    @differentiating(forward)
    func gradForward(_ input: Input) ->
        (value: Output, pullback: (Self.Output.TangentVector) ->
            (Self.TangentVector, Self.Input.TangentVector)) {
        switch Context.local.learningPhase {
        case .training:  return valueWithPullback(at: input) { $0.forwardTraining($1)  }
        case .inference: return valueWithPullback(at: input) { $0.forwardInference($1) }
        }
    }
}

Now we can implement a BatchNorm that we can use in our models:

In [0]:
//export
public protocol Norm: FALayer where Input == TF, Output == TF {
    init(_ featureCount: Int, epsilon: Float)
}

public struct FABatchNorm: LearningPhaseDependent, Norm {
    // Configuration hyperparameters
    @noDerivative var momentum, epsilon: Float
    // Running statistics
    @noDerivative let runningMean, runningVariance: Reference<TF>
    // Trainable parameters
    public var scale, offset: TF
    
    public init(_ featureCount: Int, momentum: Float, epsilon: Float = 1e-5) {
        self.momentum = momentum
        self.epsilon = epsilon
        self.scale = Tensor(ones: [featureCount])
        self.offset = Tensor(zeros: [featureCount])
        self.runningMean = Reference(Tensor(0))
        self.runningVariance = Reference(Tensor(1))
    }
    
    public init(_ featureCount: Int, epsilon: Float = 1e-5) {
        self.init(featureCount, momentum: 0.9, epsilon: epsilon)
    }

    @differentiable
    public func forwardTraining(_ input: TF) -> TF {
        let mean = input.mean(alongAxes: [0, 1, 2])
        let variance = input.variance(alongAxes: [0, 1, 2])
        runningMean.value += (mean - runningMean.value) * (1 - momentum)
        runningVariance.value += (variance - runningVariance.value) * (1 - momentum)
        let normalizer = rsqrt(variance + epsilon) * scale
        return (input - mean) * normalizer + offset
    }
    
    @differentiable
    public func forwardInference(_ input: TF) -> TF {
        let (mean, variance) = (runningMean.value, runningVariance.value)
        let normalizer = rsqrt(variance + epsilon) * scale
        return (input - mean) * normalizer + offset
    }
}

: ignored

Here is a generic ConvNorm layer, that combines a conv2d and a norm (like batchnorm, running batchnorm etc...) layer.

In [0]:
//export
public struct ConvNorm<NormType: Norm & FALayer>: FALayer
    where NormType.AllDifferentiableVariables == NormType.TangentVector {
    public var conv: FANoBiasConv2D<Float>
    public var norm: NormType
    
    public init(_ cIn: Int, _ cOut: Int, ks: Int = 3, stride: Int = 2){
        self.conv = FANoBiasConv2D(cIn, cOut, ks: ks, stride: stride, activation: relu) 
        self.norm = NormType(cOut, epsilon: 1e-5)
    }

    @differentiable
    public func forward(_ input: Tensor<Float>) -> Tensor<Float> {
        return norm(conv(input))
    }
}

: ignored

In [0]:
//export
public struct CnnModelNormed<NormType: Norm & FALayer>: FALayer
    where NormType.AllDifferentiableVariables == NormType.TangentVector {
    public var convs: [ConvNorm<NormType>]
    public var pool = FAGlobalAvgPool2D<Float>()
    public var linear: FADense<Float>
    
    public init(channelIn: Int, nOut: Int, filters: [Int]){
        let allFilters = [channelIn] + filters
        convs = Array(0..<filters.count).map { i in
            return ConvNorm<NormType>(allFilters[i], allFilters[i+1], ks: 3, stride: 2)
        }
        linear = FADense<Float>(filters.last!, nOut)
    }
    
    @differentiable
    public func forward(_ input: TF) -> TF {
        // TODO: Work around https://bugs.swift.org/browse/TF-606
        return linear.forward(pool.forward(convs(input)))
    }
}

: ignored