# Lesson 8 - Part 02

This is a reimplementation of fastai part 2 version 3 in Swift.
https://github.com/fastai/fastai_docs/blob/master/dev_course/dl2/02_fully_connected.ipynb

note: this requires my fork of [swift-jupyter](https://github.com/metachi/swift-jupyter) and a clone of my [TimeMagic](https://github.com/metachi/TimeMagic) repo for the ```%%time``` and ```%%timeit``` magic commands to work

In [1]:
%install '.package(path: "~/gitrepos/TimeMagic")' TimeMagic
%install '.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")' Path
%install '.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")' Just

Installing packages:
	.package(path: "~/gitrepos/TimeMagic")
		TimeMagic
	.package(url: "https://github.com/mxcl/Path.swift", from: "0.16.1")
		Path
	.package(url: "https://github.com/JustHTTP/Just", from: "0.7.1")
		Just
Fetching https://github.com/mxcl/Path.swift
Fetching https://github.com/JustHTTP/Just
Completed resolution in 1.35s
Cloning https://github.com/mxcl/Path.swift
Resolving https://github.com/mxcl/Path.swift at 0.16.2
Cloning https://github.com/JustHTTP/Just
Resolving https://github.com/JustHTTP/Just at 0.7.1
Compile Swift Module 'TimeMagic' (1 sources)
Compile Swift Module 'Just' (1 sources)
Compile Swift Module 'Path' (9 sources)
Compile Swift Module 'jupyterInstalledPackages' (1 sources)
Linking ./.build/x86_64-unknown-linux/debug/libjupyterInstalledPackages.so
Installation complete!

In [2]:
import TimeMagic
import Foundation
import Path
import Just

### Download Dataset

In [3]:
print(Path.home/".fastai"/"data"/"test.txt")

/home/jeff/.fastai/data/test.txt


In [4]:
public func download(_ url: String, dest: String){
    let r = Just.get(url, allowRedirects:false)
    do {
        try r.content!.write(to: URL.init(fileURLWithPath: dest))
    } catch {
        print("error downloading \(url)")
    }
}

In [5]:
let base = "http://yann.lecun.com/exdb/mnist/"
let trn_imgs = "train-images-idx3-ubyte"
let trn_lbls = "train-labels-idx1-ubyte"
let val_imgs = "t10k-images-idx3-ubyte"
let val_lbls = "t10k-labels-idx1-ubyte" 

In [6]:
let dest = Path.home/".fastai"/"data"/"mnist"

In [7]:
if !dest.exists{
    dest.mkdir()
}

In [8]:
for fileName in [trn_imgs, trn_lbls, val_imgs, val_lbls] {
    var destPath = dest/(fileName)
    if !destPath.exists{
        download("\(base)/\(fileName)", dest: destPath.string + ".gz")
    }
}

In [9]:
for n in dest.ls() {
    print(n.path)
}

/home/jeff/.fastai/data/mnist/t10k-labels-idx1-ubyte
/home/jeff/.fastai/data/mnist/train-labels-idx1-ubyte
/home/jeff/.fastai/data/mnist/train-images-idx3-ubyte
/home/jeff/.fastai/data/mnist/t10k-images-idx3-ubyte


gunzip the files

In [10]:
for n in dest.ls() {
    print(n.path)
}

/home/jeff/.fastai/data/mnist/t10k-labels-idx1-ubyte
/home/jeff/.fastai/data/mnist/train-labels-idx1-ubyte
/home/jeff/.fastai/data/mnist/train-images-idx3-ubyte
/home/jeff/.fastai/data/mnist/t10k-images-idx3-ubyte


### Load the dataset

In [11]:
import TensorFlow

See [Yann Lecunn's site](http://yann.lecun.com/exdb/mnist/) for info on why we have to drop the first few k bits.  See the "TRAINING SET LABEL FILE" heading and similarly named headings.

In [12]:
func loadData(path: String, shape: [Int32], is_label: Bool) -> Tensor<Float> {
    let dropK: Int = (is_label ? 8 : 16)
    let data = try! Data.init(contentsOf: 
                     URL.init(fileURLWithPath: path)
                    ).dropFirst(dropK)
    let tensorShape = TensorShape.init(shape)
    return Tensor(data.map(Float.init)).reshaped(to: tensorShape)
}

In [13]:
var xTrain: Tensor<Float> = loadData(path: (dest/trn_imgs).string,
                   shape: [60000, 784],
                   is_label: false)/255

In [14]:
let yTrain: Tensor<Float> = loadData(path: (dest/trn_lbls).string,
                   shape: [60000],
                   is_label: true)

In [15]:
var xValid: Tensor<Float> = loadData(path: (dest/val_imgs).string,
                   shape: [10000, 784],
                   is_label: false)/255

In [16]:
let yValid: Tensor<Float> = loadData(path: (dest/val_lbls).string,
                   shape: [10000],
                   is_label: true)

### The forward and backward passes

In [17]:
public extension Tensor where Scalar : FloatingPoint {
    func stddev(alongAxes axes: [Int32])-> Tensor<Scalar>{
        let mean = self.mean(alongAxes: axes)
        return sqrt((self - mean).squared().mean(alongAxes: axes))
    }
    
    func stddev()-> Tensor<Scalar>{
        let mean = self.mean()
        return sqrt((self - mean).squared().mean())
    }
}

In [18]:
func normalize<Scalar: FloatingPoint>(_ x: Tensor<Scalar>, _ mean: Tensor<Scalar>? = nil, _ stddev: Tensor<Scalar>? = nil) ->  Tensor<Scalar>{
    var mean = (mean ?? x.mean())
    var stddev = (stddev ?? x.stddev())
    return (x-mean)/stddev
}

In [19]:
var xTrainNormal: Tensor<Float> = normalize(xTrain)
var xValidNormal: Tensor<Float> = normalize(xValid,
                                            xTrain.mean(),
                                            sqrt(xTrain.variance(alongAxes: [0,1]))
                                           )

In [20]:
func almostEqual<Scalar: SignedNumeric & FloatingPoint>(_ x: Tensor<Scalar>, _ y: Tensor<Scalar>, _ tolerance: Tensor<Scalar>) -> Bool{
    return (abs(x - y) .< tolerance).all()
}

In [21]:
print(xTrainNormal.mean(), xTrainNormal.variance(alongAxes: [0,1]))

-1.4449139e-08 [[1.0000017]]


In [22]:
almostEqual(xTrainNormal.mean(), Tensor(0), Tensor(1e-3))

true


In [23]:
almostEqual(sqrt(xTrainNormal.variance(alongAxes: [0,1])),
            Tensor(1),
            Tensor(1e-3))

true


This should be near 0,1

In [24]:
print(xValidNormal.mean(), xValidNormal.variance(alongAxes: [0,1]))

0.0060177604 [[1.0154601]]


In [25]:
almostEqual(xValidNormal.mean(), Tensor(0), Tensor(1e-2))

true


In [26]:
almostEqual(sqrt(xValidNormal.variance(alongAxes: [0,1])),
            Tensor(1),
            Tensor(1e-2))

true


In [27]:
var shp = xTrainNormal.shape
let n = shp[0]
let m = shp[1]
let c = yTrain.max() + 1
print(n, m, c)

60000 784 10.0


## Foundations version

### Basic architecture

In [28]:
var nh: Int32 = 50

In [29]:
public extension Tensor where Scalar : BinaryFloatingPoint {
    init(simpleKaiming shape: TensorShape){
        self.init(Tensor(randomNormal: shape) / sqrt(Scalar(shape[0])))
    }
    
    init(kaiming shape: TensorShape){
        self.init(Tensor(randomNormal: shape) * sqrt(2/Scalar(shape[0])))
    }
}

In [30]:
// kaiming init / he init
var w1 = Tensor<Float>(simpleKaiming: [m, nh])
var b1 = Tensor<Float>(zeros: [nh])
var w2 = Tensor<Float>(simpleKaiming: [nh, 1])
var b2 = Tensor<Float>(zeros: [1])

In [31]:
func lin<Scalar: Numeric>(_ x: Tensor<Scalar>, _ w: Tensor<Scalar>, _ b: Tensor<Scalar>) -> Tensor<Scalar>{
    return matmul(x, w) + b
}

In [32]:
// there is already a relu function in s4tf
func myRelu<Scalar: Numeric & Comparable>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
    return max(0, x)
}

In [33]:
var t = lin(xValidNormal, w1, b1)

In [34]:
print(t.mean(), t.stddev())

-0.043281157 1.0337342


In [35]:
var t = myRelu(lin(xValidNormal, w1, b1))

In [36]:
print(t.mean(), t.stddev())

0.38466653 0.58239657


In [37]:
var w1 = Tensor<Float>(kaiming: [m, nh])

In [38]:
print(w1.mean(), w1.stddev())

-0.00040210917 0.05050641


In [39]:
var t = myRelu(lin(xValidNormal, w1, b1))

In [40]:
print(t.mean(), t.stddev())

0.5217546 0.79198986


In [41]:
// there is already a relu function in s4tf
func myRelu<Scalar: BinaryFloatingPoint & Comparable>(_ x: Tensor<Scalar>) -> Tensor<Scalar> {
    return max(0, x) - 0.5
}

In [42]:
var w1 = Tensor<Float>(kaiming: [m, nh])
var t1 = myRelu(lin(xValidNormal, w1, b1))
print(t1.mean(), t1.stddev())

0.012894014 0.82064474


In [43]:
func myModel(_ xb: Tensor<Float>) -> Tensor<Float> {
    let l1 = lin(xb, w1, b1)
    let l2 = relu(l1)
    let l3 = lin(l2, w2, b2)
    return l3
}

In [44]:
%%timeit 100
var _ = myModel(xValidNormal)

Max: 414.641 µs
Min: 314.73 µs
Mean: 364.82986999999997 µs
Std Dev: 26.718132256448985 µs


In [45]:
xValidNormal.shape

▿ TensorShape
  ▿ dimensions : 2 elements
    - 0 : 10000
    - 1 : 784


In [46]:
myModel(xValidNormal).shape

▿ TensorShape
  ▿ dimensions : 2 elements
    - 0 : 10000
    - 1 : 1


In [47]:
yValid.shape

▿ TensorShape
  ▿ dimensions : 1 element
    - 0 : 10000


### Loss function: MSE

In [48]:
func mse(_ pred: Tensor<Float>, _ target: Tensor<Float>)->Tensor<Float>{
    return (pred.squeezingShape(at: -1) - target).squared().mean()
}

In [49]:
var preds = myModel(xTrainNormal)

In [50]:
mse(preds, yTrain)

30.527683


### Gradients and backward pass

In [95]:
public class TensorWithGrad<Scalar: TensorFlowNumeric>{
    var tensor: Tensor<Scalar>
    var g: Tensor<Scalar>
    
    init (_ x: Tensor<Scalar>){
        tensor = x
        g = Tensor<Scalar>(zeros: tensor.shape)
    }
}

In [105]:
func lin<Scalar: Numeric>(_ x: TensorWithGrad<Scalar>, _ w: TensorWithGrad<Scalar>, _ b: TensorWithGrad<Scalar>) -> TensorWithGrad<Scalar>{
    return TensorWithGrad(matmul(x.tensor, w.tensor) + b.tensor)
}

In [104]:
func relu<Scalar: Numeric & Comparable>(_ x: TensorWithGrad<Scalar>) -> TensorWithGrad<Scalar> {
    return TensorWithGrad(max(0, x.tensor))
}

In [110]:
func mse(_ pred: TensorWithGrad<Float>, _ target: Tensor<Float>)->Tensor<Float>{
    return (pred.tensor.squeezingShape(at: -1) - target).squared().mean()
}

In [96]:
func mseGrad<Scalar: Numeric & Comparable>(_ inp: TensorWithGrad<Scalar>, _ target: TensorWithGrad<Scalar>){
    inp.g = 2 * (inp.tensor.squeezingShape(at: -1) - target.tensor).expandingShape(at: -1) / Tensor<Scalar>(inp.tensor.scalarCountTensor)
}

In [97]:
func reluGrad<Scalar: Numeric & Comparable>(_ inp: TensorWithGrad<Scalar>, _ out: TensorWithGrad<Scalar>){
    inp.g = Tensor<Scalar>(inp.tensor .> 0) * out.g
}

In [98]:
func linGrad<Scalar: Numeric & Comparable>(_ inp: TensorWithGrad<Scalar>,
                                           _ out: TensorWithGrad<Scalar>,
                                           _ w: TensorWithGrad<Scalar>,
                                           _ b: TensorWithGrad<Scalar>){
    inp.g = matmul(out.g, w.tensor.transposed())
    w.g = matmul(inp.tensor.transposed(), out.g)
    b.g = out.g.sum(squeezingAxes: 0)
}

In [108]:
var inpt = TensorWithGrad(xTrainNormal)
var w1t = TensorWithGrad(w1)
var b1t = TensorWithGrad(b1)
var w2t = TensorWithGrad(w2)
var b2t = TensorWithGrad(b2)

In [112]:
func forwardBackward(_ inp: TensorWithGrad<Float>, _ target: Tensor<Float>){
    //forward pass
    let l1 = lin(inp, w1t, b1t)
    let l2 = relu(l1)
    let out = lin(l2, w2t, b2t)
    let loss = mse(out, target)
    
    //backward pass
    mseGrad(out, TensorWithGrad(target))
    linGrad(l2, out, w2t, b2t)
    reluGrad(l1, l2)
    linGrad(inp, l1, w1t, b1t)
}

In [113]:
forwardBackward(inpt, yTrain)

In [115]:
w1t.g[0]

[0.036845412, 0.15428698, 0.61514556, 0.0026182751, -0.03594636, 0.23792759, -0.012913689, -0.06650702, -0.11044304, 0.6742586, -0.042122237, -0.08594383, -0.1363779, 0.09261866, 0.2736918, -0.008390956, 0.049746882, 0.19855468, 0.64887106, -0.12221576, -0.06695481, -0.27904707, 0.055384487, 0.13180414, 0.17950325, -0.31356674, -0.13516948, 0.023555035, 0.15248163, -0.09164139, 0.07572657, -0.2633806, -0.5097568, -0.19652382, 0.017543625, -0.54182184, -0.029417358, -0.22133367, 0.026922356, 0.032767624, 0.02023641, -0.3924407, 0.0483596, -0.062351465, -0.3280657, 0.0010294362, -0.069363564, -0.18494691, 0.022711221, -0.33429432]
