# Annealing

In [None]:
%install '.package(path: "$cwd/FastaiNotebook_04_callbacks")' FastaiNotebook_04_callbacks

## Load data

In [None]:
import FastaiNotebook_04_callbacks

In [None]:
// export
import Path
import TensorFlow

In [None]:
let data = mnistDataBunch(flat: true)

In [None]:
let (n,m) = (60000,784)
let c = 10
let nHid = 50

In [None]:
let opt = SGD<BasicModel, Float>(learningRate: 1e-2)

In [None]:
func modelInit() -> BasicModel {return BasicModel(nIn: m, nHid: nHid, nOut: c)}

In [None]:
func lossOutputWithGrad(
    model: BasicModel,
    in context: Context,
    inputs: Tensor<Float>,
    labels: Tensor<Int32>
) -> (Tensor<Float>, BasicModel.Output, BasicModel.CotangentVector) {
    var outputs: BasicModel.Output? = nil
    let (loss, grads) = model.valueWithGradient { model -> Tensor<Float> in
        let predictions = model.applied(to: inputs, in: context)
        outputs = predictions
        return softmaxCrossEntropy(logits: predictions, labels: labels)
    }
    return (loss, outputs!, grads)
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.AvgMetric(metrics: [accuracy])]

In [None]:
learner.fit(2)

## Annealing

We define two new callbacks: the Recorder to save track of the loss and our scheduled learning rate, and a ParamScheduler that can schedule any hyperparameter as long as it's registered in the state_dict of the optimizer. 

In [None]:
extension Learner {
    func makeRecorder() -> Recorder {
        return Recorder()
    }

    public class Recorder: Delegate {
        public var losses: [Loss] = []
        public var lrs: [O.Scalar] = []
        
        public override func batchDidFinish(learner: Learner) throws {
            if learner.inTrain {
                losses.append(learner.currentLoss)
                lrs.append(learner.optimizer.learningRate)
            }
        }
    }
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
let recorder = learner.makeRecorder()

In [None]:
learner.delegates = [Learner.TrainEvalDelegate(), Learner.AvgMetric(metrics: [accuracy]), recorder]

In [None]:
learner.fit(2)

In [None]:
recorder.losses.count

### Progress bar

In [None]:
import Glibc
import Foundation

In [None]:
func formatTime(_ t: Float) -> String {
    let t = Int(t)
    let (h,m,s) = (t/3600, (t/60)%60, t%60)
    return h != 0 ? String(format: "%02d:%02d:%02d", h, m, s) : String(format: "%02d:%02d", m, s)
}

In [None]:
formatTime(78.23)

In [None]:
public struct ProgressBar{
    let total: Int
    let length: Int = 50
    let showEvery: Float = 0.02
    let fillChar: Character = "X"
    public var comment: String = ""
    private var lastVal: Int = 0
    private var waitFor: Int = 0
    private var startTime: UInt64 = 0
    private var lastShow: UInt64 = 0
    private var estimatedTotal: Float = 0.0
    private var bar: String = ""
    
    public init(_ c: Int) { total = c }
    
    public mutating func update(_ val: Int){
        if val == 0 {
            startTime = DispatchTime.now().uptimeNanoseconds
            lastShow = startTime
            waitFor = 1
            update_bar(0)
        } else if val >= lastVal + waitFor || val == total {
            lastShow = DispatchTime.now().uptimeNanoseconds
            let averageTime = Float(lastShow - startTime) / (1e9 * Float(val))
            waitFor = max(Int(averageTime / (showEvery + 1e-8)), 1)
            estimatedTotal = Float(total) * averageTime
            update_bar(val)
        }
    }
    
    public mutating func update_bar(_ val: Int){
        lastVal = val
        bar = String(repeating: fillChar, count: (val * length) / total)
        bar += String(repeating: "-", count: length - (val * length) / total)
        let pct = String(format: "%.2f", 100.0 * Float(val)/Float(total))
        let elapsedTime = Float(lastShow - startTime) / 1e9
        bar += " \(pct)% [\(val)/\(total) \(formatTime(elapsedTime))<\(formatTime(estimatedTotal))"
        bar += comment.isEmpty ? "]" : " \(comment)]"
        print(bar, terminator:"\r")
        fflush(stdout)
    }
    
    public func remove(){
        print(String(repeating: " ", count: bar.count), terminator:"\r")
        fflush(stdout)
    }
}

In [None]:
var tst = ProgressBar(100)
for i in 0...100{
    tst.update(i)
    usleep(50000)
}
tst.remove()

In [None]:
extension Learner {
    public class ShowProgress: Delegate {
        var pbar: ProgressBar? = nil
        var iter: Int = 0
        
        public override func epochWillStart(learner: Learner) throws{
            pbar = ProgressBar(learner.data.train.count(where: {_ in true}))
            iter = 0
            pbar!.update(iter)
        }
        
        public override func validationWillStart(learner: Learner) throws{
            if pbar != nil { pbar!.remove() }
            pbar = ProgressBar(learner.data.valid.count(where: {_ in true}))
            iter = 0
            pbar!.update(iter)
        }
        
        public override func epochDidFinish(learner: Learner) throws{
            if pbar != nil { pbar!.remove() }
        }
        
        public override func batchDidFinish(learner: Learner) throws{
            iter += 1
            pbar!.update(iter)
        }
    }
}

In [None]:
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)

In [None]:
let recorder = learner.makeRecorder()
learner.delegates = [Learner.TrainEvalDelegate(), Learner.ShowProgress(), 
                     Learner.AvgMetric(metrics: [accuracy]), recorder]

In [None]:
learner.fit(2)

## Add Callbacks

The code below adds callbacks and defines a new training loop.

In [None]:
/// Simple SGD optimizer with a modifiable learning rate.
/// Remove me when we have a new linux build that includes the TF-425 fix.
protocol SettableOptimizer: Optimizer {
    var learningRate: Scalar { get set }
}

public class SettableSGD<Model: Layer>: SettableOptimizer
    where Model.AllDifferentiableVariables == Model.CotangentVector {
    /// The learning rate.
    public var learningRate: Float {
        willSet(newLearningRate) {
            precondition(newLearningRate >= 0, "Learning rate must be non-negative")
        }
    }

    public init(learningRate: Float = 0.01) {
        precondition(learningRate >= 0, "Learning rate must be non-negative")
        self.learningRate = learningRate
    }

    public func update(_ model: inout Model.AllDifferentiableVariables,
                       along direction: Model.CotangentVector) {
        for kp in model.recursivelyAllWritableKeyPaths(to: Tensor<Scalar>.self) {
            model[keyPath: kp] += learningRate * direction[keyPath: kp]
        }
    }
}


In [None]:
let opt = SettableSGD<BasicModel>(learningRate: 1e-2)

In [None]:
/// A non-generalized learning rate scheduler
extension Learner where O: SettableOptimizer, O.Scalar == Float {

    public class ParamScheduler: Delegate {
        typealias ScheduleFunc = (Float) -> Float

        // A learning rate schedule from step to float.
        public var scheduler: (Float) -> Float  // TODO: switch to ScheduleFunc
        private var step = 0
        private var totalSteps = 0
        
        init(scheduler: @escaping (Float) -> Float) {
            self.scheduler = scheduler
        }
        
        override public func trainingWillStart(learner: Learner) {
            step = 0
            totalSteps = learner.data.train.count(where: {_ in true})
        }
        
        override public func batchDidFinish(learner: Learner) {
            learner.optimizer.learningRate = scheduler(Float(step)/Float(totalSteps))
        }
    }
}


In [None]:
func linearSchedule(start: Float, end: Float, pct: Float) -> Float {
    return start + pct * (end - start)
}

func makeAnnealer(start: Float, end: Float, schedule: @escaping (Float, Float, Float) -> Float) -> (Float) -> Float { 
    return { pct in return schedule(start, end, pct) }
}

In [None]:
let annealer = makeAnnealer(start: 1, end: 2, schedule: linearSchedule)
annealer(0.3)

In [None]:
// COMPILER CRASH IN HERE!
let learner = Learner(data: data, lossOutputWithGradient: lossOutputWithGrad, optimizer: opt, initializingWith: modelInit)
let recorder = learner.makeRecorder()
learner.delegates = [Learner.TrainEvalDelegate(), Learner.ShowProgress(), 
                     Learner.AvgMetric(metrics: [accuracy]), recorder,
                     Learner.ParamScheduler(annealer)]

In [None]:
// TODO: implement the rest of the notebook!