In [None]:
// %install '.package(path: "https://github.com/fastai/fastai_docs/dev_swift/FastaiNotebooks")' FastaiNotebooks

In [None]:
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")' Path
%install '.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")' Just
%install '.package(url: "https://github.com/1024jp/GzipSwift", from: "4.1.0")' Gzip

Installing packages:
	.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")
		Path
	.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")
		Just
	.package(url: "https://github.com/1024jp/GzipSwift", from: "4.1.0")
		Gzip
Working in: /tmp/tmps2icq5lq
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Fetching https://github.com/1024jp/GzipSwift
Completed resolution in 1.72s
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Cloning https://github.com/1024jp/GzipSwift
Resolving https://github.com/1024jp/GzipSwift at 4.1.0
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Compile system-zlib anchor.c
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'Gzip' (1 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPa

In [None]:
import Foundation
import TensorFlow
import Just
import Gzip
import Path
import Python
print(Python.version)

3.6.7 (default, Oct 22 2018, 11:32:17) 
[GCC 8.2.0]


In [None]:
func loadMNIST(training: Bool, labels: Bool) -> Tensor<Float> {
    let split = training ? "train" : "t10k"
    let kind = labels ? "labels" : "images"
    let batch = training ? Int32(60000) : Int32(10000)
    let shape: TensorShape = labels ? [batch] : [batch, 28, 28]
    let rank = shape.rank
    let dropK = labels ? 8 : 16
    let gzipped = Just.get("http://yann.lecun.com/exdb/mnist/" + split +
                         "-" + kind + "-idx\(rank)-ubyte.gz").content!
    let data = try! gzipped.gunzipped().dropFirst(dropK)
    return Tensor(data.map {Float($0) / Float(255.0)}).reshaped(to: shape)
}

func loadMNIST() -> (
    Tensor<Float>,
    Tensor<Float>,
    Tensor<Float>,
    Tensor<Float>
) {
    return (
        loadMNIST(training: true, labels: false),
        loadMNIST(training: true, labels: true),
        loadMNIST(training: false, labels: false),
        loadMNIST(training: false, labels: true)
    )
}

## Does nn.Conv2d init work well?

In [None]:
extension Tensor where Scalar: TensorFlowFloatingPoint {
    func normalized(mean: Tensor, std: Tensor) -> Tensor {
        return (self - mean) / std
    }
}

In [None]:
// TODO: upstream this
extension Tensor where Scalar: TensorFlowFloatingPoint {
    func variance() -> Tensor {
        let axes = Array<Int32>(0..<rank)
        return variance(alongAxes: axes).squeezingShape(at: axes)
    }
}

In [None]:
var (trainX, trainY, testX, testY) = loadMNIST()
let (trainX_mean, trainX_std) = (trainX.mean(), sqrt(trainX.variance()))
trainX = trainX.normalized(mean: trainX_mean, std: trainX_std)
testX = testX.normalized(mean: trainX_mean, std: trainX_std)

In [None]:
trainX = trainX.reshaped(to: [trainX.shape[0], 28, 28, 1])
testX = testX.reshaped(to: [testX.shape[0], 28, 28, 1])
print(trainX.shape, testX.shape)

TensorShape(dimensions: [60000, 28, 28, 1]) TensorShape(dimensions: [10000, 28, 28, 1])


In [None]:
let images = trainX.shape[0]
let classes = trainY.max() + 1
let channels = 32

In [None]:
var layer1 = Conv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)

In [None]:
let x = testX[0..<100]

In [None]:
x.shape

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 100
    - 1 : 28
    - 2 : 28
    - 3 : 1


In [None]:
extension Tensor where Scalar: TensorFlowFloatingPoint {
    func stats() -> (mean: Tensor, std: Tensor) {
        return (mean: self.mean(), std: sqrt(self.variance()))
    }
}

In [None]:
(filter: layer1.filter.stats(), bias: layer1.bias.stats())

▿ 2 elements
  ▿ filter : 2 elements
    - mean : 0.0013559912
    - std : 0.048515525
  ▿ bias : 2 elements
    - mean : 0.0
    - std : 0.0


In [None]:
let result = layer1.applied(to: x)

In [None]:
result.stats()

▿ 2 elements
  - mean : 0.0030350306
  - std : 0.27894023


In [None]:
extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingNormal shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        self.init(
            randomNormal: shape,
            stddev: gain / sqrt(Scalar(fanIn)),
            generator: &PhiloxRandomNumberGenerator.global
        )
    }
}

In [None]:
layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 1.0)
layer1.applied(to: x).stats()

▿ 2 elements
  - mean : -0.009885282
  - std : 1.0364686


In [None]:
func leakyRelu<T: TensorFlowFloatingPoint>(
    _ x: Tensor<T>,
    negativeSlope: Double = 0.0
) -> Tensor<T> {
    return max(0, x) + T(negativeSlope) * min(0, x)
}

In [None]:
layer1.filter = Tensor(kaimingNormal: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()

▿ 2 elements
  - mean : 0.54207695
  - std : 1.0349665


In [None]:
var layer1 = Conv2D<Float>(filterShape: (5, 5, 1, channels)) //Conv2D(1, nh, 5)
leakyRelu(layer1.applied(to: x)).stats()

▿ 2 elements
  - mean : 0.0999834
  - std : 0.1932733


In [None]:
layer1.filter.shape

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 5
    - 1 : 5
    - 2 : 1
    - 3 : 32


In [None]:
let spatialDimCount = layer1.filter.rank - 2
let receptiveField = layer1.filter.shape[0..<spatialDimCount].contiguousSize
receptiveField

25


In [None]:
let filtersIn = layer1.filter.shape[2]
let filtersOut = layer1.filter.shape[3]
print(filtersIn, filtersOut)

1 32


In [None]:
let fanIn = filtersIn * receptiveField
let fanOut = filtersOut * receptiveField
print(fanIn, fanOut)

25 800


In [None]:
func gain(_ negativeSlope: Double) -> Double {
    return sqrt(2.0 / (1.0 + pow(negativeSlope, 2.0)))
}

In [None]:
(gain(1.0), gain(0.0), gain(0.01), gain(0.1), gain(sqrt(5.0)))

▿ 5 elements
  - .0 : 1.0
  - .1 : 1.4142135623730951
  - .2 : 1.4141428569978354
  - .3 : 1.4071950894605838
  - .4 : 0.5773502691896257


In [None]:
sqrt((2 * Tensor<Float>(randomUniform: [10000]) - 1).variance())

0.57510734


In [None]:
1.0 / sqrt(3.0)

0.5773502691896258


In [None]:
extension Tensor where Scalar: TensorFlowFloatingPoint {
    init(kaimingUniform shape: TensorShape, negativeSlope: Double = 1.0) {
        // Assumes Leaky ReLU nonlinearity
        let gain = Scalar(sqrt(2.0 / (1.0 + pow(negativeSlope, 2))))
        let spatialDimCount = shape.count - 2
        let receptiveField = shape[0..<spatialDimCount].contiguousSize
        let fanIn = shape[shape.count - 2] * receptiveField
        let bound = sqrt(Scalar(3.0)) * gain / sqrt(Scalar(fanIn))
        self = bound * (2 * Tensor(
            randomUniform: shape,
            generator: &PhiloxRandomNumberGenerator.global
        ) - 1)
    }
}

In [None]:
layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: 0.0)
leakyRelu(layer1.applied(to: x)).stats()

▿ 2 elements
  - mean : 0.4965667
  - std : 0.893754


In [None]:
layer1.filter = Tensor(kaimingUniform: layer1.filter.shape, negativeSlope: sqrt(5.0))
leakyRelu(layer1.applied(to: x)).stats()

▿ 2 elements
  - mean : 0.20423418
  - std : 0.40728986


In [None]:
struct Model: Layer {
    var conv1 = Conv2D<Float>(
        filterShape: (5, 5, 1, 8),
        strides: (2, 2),
        padding: .same,
        activation: relu
    )
    var conv2 = Conv2D<Float>(
        filterShape: (3, 3, 8, 16),
        strides: (2, 2),
        padding: .same,
        activation: relu
    )
    var conv3 = Conv2D<Float>(
        filterShape: (3, 3, 16, 32),
        strides: (2, 2),
        padding: .same,
        activation: relu
    )
    var conv4 = Conv2D<Float>(
        filterShape: (3, 3, 32, 1),
        strides: (2, 2),
        padding: .valid
    )
    var flatten = Flatten<Float>()
    @differentiable
    func applied(to input: Tensor<Float>, in context: Context) -> Tensor<Float> {
        return input.sequenced(
            in: context,
            through: conv1, conv2, conv3, conv4, flatten
        )
    }
}

In [None]:
let y = testY[0..<100]
var model = Model()

In [None]:
let prediction = model.applied(to: x)
prediction.stats()

▿ 2 elements
  - mean : -0.05856763
  - std : 0.09596065


In [None]:
let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model.applied(
        to: x,
        in: Context(learningPhase: .training)
    ), expected: y)
}
// Blocked by TF-417
gradients.conv1.filter.stats()

: ignored

In [None]:
for keyPath in [\Model.conv1, \Model.conv2, \Model.conv3, \Model.conv4] {
    model[keyPath: keyPath].filter = Tensor(kaimingUniform: model[keyPath: keyPath].filter.shape)
}

In [None]:
let prediction = model.applied(to: x)
prediction.stats()

▿ 2 elements
  - mean : -1.596702
  - std : 0.6037773


In [None]:
let gradients = gradient(at: model) { model in
    meanSquaredError(predicted: model.applied(
        to: x,
        in: Context(learningPhase: .training)
    ), expected: y)
}
// Blocked by TF-417
gradients.conv1.filter.stats()

: ignored

## Export

In [None]:
!./notebook2script.py 02_fully_connected.ipynb

Converted 02_fully_connected.ipynb to nb_02.py
