In [1]:
%install '.package(path: "~/gitrepos/TimeMagic")' TimeMagic
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")' Path

Installing packages:
	.package(path: "~/gitrepos/TimeMagic")
		TimeMagic
	.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")
		Path
With SwiftPM flags: []
Working in: /tmp/tmpdzaaa8jq
Fetching https://github.com/mxcl/Path.swift
Completed resolution in 1.27s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Compile Swift Module 'TimeMagic' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Initializing Swift...
Loading library...
Installation complete!


In [2]:
import TimeMagic
import Path
import TensorFlow
import Foundation

In [3]:
func loadData(path: String, shape: [Int32], is_label: Bool) -> Tensor<Float> {
    let dropK: Int = (is_label ? 8 : 16)
    let data = try! Data.init(contentsOf: 
                     URL.init(fileURLWithPath: path)
                    ).dropFirst(dropK)
    let tensorShape = TensorShape.init(shape)
    return Tensor(data.map(Float.init)).reshaped(to: tensorShape)
}

In [4]:
let basepath = Path.home/".fastai"/"data"/"mnist"
let trnImgs = "train-images-idx3-ubyte"
let trnLbls = "train-labels-idx1-ubyte"
let valImgs = "t10k-images-idx3-ubyte"
let valLbls = "t10k-labels-idx1-ubyte" 

In [5]:
var xTrain: Tensor<Float> = loadData(path: (basepath/trnImgs).string,
                   shape: [60000, 28, 28, 1],
                   is_label: false)/255

In [6]:
let yTrain: Tensor<Float> = loadData(path: (basepath/trnLbls).string,
                   shape: [60000],
                   is_label: true)

In [7]:
var xValid: Tensor<Float> = loadData(path: (basepath/valImgs).string,
                   shape: [10000, 28, 28, 1],
                   is_label: false)/255

In [8]:
let yValid: Tensor<Float> = loadData(path: (basepath/valLbls).string,
                   shape: [10000],
                   is_label: true)

In [9]:
public extension Tensor where Scalar : TensorFlowFloatingPoint {
    func stddev(alongAxes axes: [Int32])-> Tensor<Scalar>{
        let mean = self.mean(alongAxes: axes)
        return sqrt((self - mean).squared().mean(alongAxes: axes))
    }
    
    func stddev()-> Tensor<Scalar>{
        let mean = self.mean()
        return sqrt((self - mean).squared().mean())
    }
}

In [10]:
func normalize<Scalar: TensorFlowFloatingPoint>(
    _ x: Tensor<Scalar>, _ mean: Tensor<Scalar>? = nil, _ stddev: Tensor<Scalar>? = nil
) ->  Tensor<Scalar>{
    var mean = (mean ?? x.mean())
    var stddev = (stddev ?? x.stddev())
    return (x-mean)/stddev
}

In [11]:
var xTrainNormal: Tensor<Float> = normalize(xTrain)
var xValidNormal: Tensor<Float> = normalize(
    xValid,
    xTrain.mean(),
    xTrain.stddev()
)

In [12]:
var l1 = Conv2D<Float>(filterShape: (5, 5, 1, 32) )

In [13]:
l1.filter.shape

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 5
    - 1 : 5
    - 2 : 1
    - 3 : 32


In [14]:
l1.bias.shape

▿ TensorShape
  ▿ dimensions : 1 element
    - 0 : 32


In [15]:
var x = xValidNormal[0..<100]

In [16]:
x.shape

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 100
    - 1 : 28
    - 2 : 28
    - 3 : 1


In [17]:
func stats<Scalar: TensorFlowFloatingPoint>(
    _ x: Tensor<Scalar>
) -> (mean: Tensor<Scalar>, stddev: Tensor<Scalar>){
    return (mean: x.mean(), stddev: x.stddev())
}

In [18]:
stats(x)

▿ 2 elements
  - mean : -0.034978602
  - stddev : 0.960318


In [19]:
stats(l1.filter)

▿ 2 elements
  - mean : -0.0013731996
  - stddev : 0.049344175


In [20]:
stats(l1.bias)

▿ 2 elements
  - mean : 0.0
  - stddev : 0.0


In [21]:
//for some reason max(Scalar, Tensor<Scalar>) is non-differentiable
//...not in real life, just according to Swift. so i am doing max(Tensor, Tensor)
@differentiable(wrt: x)
func leakyRelu<Scalar: TensorFlowFloatingPoint>(
    _ x: Tensor<Scalar>,
    _ negativeSlope: Scalar=0.01
) -> Tensor<Scalar> {
    return max(Tensor(zeros:x.shape), x) + negativeSlope * min(Tensor(zeros:x.shape), x)
}

In [22]:
public extension Tensor where Scalar : TensorFlowFloatingPoint {
    //can handle relu and leakyRelu activations
    //note: pytorch does neg slope 0.01 for leakyRelu and 0 for relu
    init(kaimingNormal shape: TensorShape, negativeSlope: Scalar=0){
        let gain: Scalar = (negativeSlope==0) ? sqrt(2.0) : sqrt(2.0/(1.0 + negativeSlope*negativeSlope))
        
        // see kaiming_normal_ in:
        // torch/nn/init.py
        let filterDimRank = shape.rank - 2
        let receptiveField: Scalar = Scalar(shape[0..<filterDimRank].contiguousSize)
        let inputFmaps: Scalar = Scalar(shape[shape.rank - 2])
//         let outputFmaps: Scalar = Scalar(shape[shape.rank - 1])
        let fanIn: Scalar = receptiveField * inputFmaps
//         let fanOut: Scalar = receptiveField * outputFmaps
        self = Tensor(randomNormal: shape) * gain/sqrt(fanIn)
    }
}

In [23]:
stats(leakyRelu(l1.applied(to: x),0))

▿ 2 elements
  - mean : 0.09033453
  - stddev : 0.15971895


In [24]:
l1.filter = Tensor<Float>(kaimingNormal: l1.filter.shape, negativeSlope: 1)

In [25]:
stats(l1.applied(to: x))

▿ 2 elements
  - mean : -0.00936679
  - stddev : 1.0865078


In [26]:
l1.filter = Tensor<Float>(kaimingNormal: l1.filter.shape, negativeSlope: 0)

In [27]:
stats(leakyRelu(l1.applied(to: x),0))

▿ 2 elements
  - mean : 0.50959545
  - stddev : 0.9501919


In [28]:
l1.filter.shape

▿ TensorShape
  ▿ dimensions : 4 elements
    - 0 : 5
    - 1 : 5
    - 2 : 1
    - 3 : 32


In [29]:
var receptiveField = l1.filter.shape[0..<2].contiguousSize
print(receptiveField)

25


In [30]:
var ni = l1.filter.shape.dimensions[2]
var nf = l1.filter.shape.dimensions[3]
print(ni, nf)

1 32


In [31]:
var fanIn = receptiveField * ni
var fanOut = receptiveField * nf
print(fanIn, fanOut)

25 800


In [32]:
func gain<Scalar: FloatingPoint>(
    _ negativeSlope: Scalar
) -> Scalar {
    return sqrt(2/(1 + negativeSlope * negativeSlope))
}

In [33]:
print(gain(1.0), gain(0), gain(0.01), gain(0.1), gain(sqrt(5.0)))

1.0 1.4142135623730951 1.4141428569978354 1.4071950894605838 0.5773502691896257


In [34]:
var rand = (Tensor<Float>(randomUniform: [10000]) * 2 - 1)
print(rand.min(), rand.max(), rand.stddev())

-0.9999883 0.99982643 0.5776051


In [35]:
1/sqrt(3)

0.5773502691896258


In [36]:
public extension Tensor where Scalar : TensorFlowFloatingPoint {
    //can handle relu and leakyRelu activations
    //note: pytorch does neg slope 0.01 for leakyRelu and 0 for relu
    init(kaimingUniform shape: TensorShape, negativeSlope: Scalar=0){
        let gain: Scalar = (negativeSlope==0) ? sqrt(2.0) : sqrt(2.0/(1.0 + negativeSlope*negativeSlope))
        
        // see kaiming_uniform_ in:
        // torch/nn/init.py
        let filterDimRank = shape.rank - 2
        let receptiveField: Scalar = Scalar(shape[0..<filterDimRank].contiguousSize)
        let inputFmaps: Scalar = Scalar(shape[shape.rank - 2])
//         let outputFmaps: Scalar = Scalar(shape[shape.rank - 1])
        let fanIn: Scalar = receptiveField * inputFmaps
//         let fanOut: Scalar = receptiveField * outputFmaps
        let bound: Scalar = gain/sqrt(fanIn) * sqrt(3)
        self = (Tensor(randomUniform: shape) * 2 - 1) * bound
    }
}

In [37]:
l1.filter = Tensor<Float>(kaimingUniform: l1.filter.shape, negativeSlope: 0.0)
stats(leakyRelu(l1.applied(to: x),0))

▿ 2 elements
  - mean : 0.5696951
  - stddev : 1.0864445


In [38]:
l1.filter = Tensor<Float>(kaimingUniform: l1.filter.shape, negativeSlope: sqrt(5))
stats(leakyRelu(l1.applied(to: x),0))

▿ 2 elements
  - mean : 0.21802388
  - stddev : 0.3621719


TODO: create adaptiveaveragepooling.

looks like this is pretty doable since the tensor extension averagePooled exists

In [39]:
public extension Conv2D where Scalar : TensorFlowFloatingPoint {
    init(
        kaimingUniform filterShape: (Int, Int, Int, Int),
        strides: (Int, Int) = (3, 3),
        padding: Padding = .valid,
        activation: @escaping Activation = identity,
        negativeSlope: Scalar = 0
        ){
        self = Conv2D<Scalar>(filterShape: filterShape, strides: strides, padding: padding, activation: activation)
        self.filter = Tensor<Scalar>(kaimingUniform: self.filter.shape, negativeSlope: negativeSlope)
    }
}

In [42]:
public struct Model<Scalar: TensorFlowFloatingPoint>: Layer {

    public var conv1 = Conv2D<Scalar>(kaimingUniform: (5, 5, 1, 8), strides: (2,2), padding: .same, activation: relu)
    public var conv2 = Conv2D<Scalar>(kaimingUniform: (3, 3, 8, 16), strides: (2,2), padding: .same, activation: relu)
    public var conv3 = Conv2D<Scalar>(kaimingUniform: (3, 3, 16, 32), strides: (2,2), padding: .same, activation: relu)
    public var conv4 = Conv2D<Scalar>(kaimingUniform: (3, 3, 32, 1), strides: (2,2), padding: .valid)
    // TODO: create adaptiveaveragepool
    public var flatten = Flatten<Scalar>()

    @differentiable
    public func applied(to input: Tensor<Scalar>, in context: Context) -> Tensor<Scalar> {
        return input.sequenced(in: context, through: conv1, conv2, conv3, conv4, flatten)
    }
}

In [51]:
var model = Model<Float>()

In [52]:
var preds = model.applied(to: x)
stats(preds)

▿ 2 elements
  - mean : 0.74558085
  - stddev : 0.6005364


In [55]:
var y = yValid[0..<100]

In [58]:
meanSquaredError(predicted: preds, expected: y)

21.024893


In [60]:
var context = Context(learningPhase: .training)

var (preds, backprop) = model.appliedForBackpropagation(to: x, in: context)
var (loss, grad) = preds.valueWithGradient{preds in
                    meanSquaredError(predicted: preds,  expected: y)
                    }
var (𝛁model, _) = backprop(grad)

In [63]:
stats(𝛁model.conv1.filter)

▿ 2 elements
  - mean : -0.023879936
  - stddev : 0.7107346
