## imports, data loading, & data prep.

In [None]:
import Pkg
include("EVGONN.jl")
include("NN.jl")
using .EVGONN
using .NN
using StatsBase
using MLDatasets

In [None]:
n_categories = 10;
n_var = 784;
n = 60000;

In [None]:
train_x, train_y = FashionMNIST.traindata()
test_x, test_y  = FashionMNIST.testdata();

In [None]:
X_train = reshape(convert(Array{Float64}, train_x), (n_var, n));
X_test = reshape(convert(Array{Float64}, test_x), (n_var, 10000));
X = hcat(X_train, X_test);

In [None]:
dt = fit(ZScoreTransform, Array(X), dims=1)
X = StatsBase.transform(dt, X);

In [None]:
X_train = reshape(X[:,1:60000], (n_var, 1, 60000));
X_test = reshape(X[:,60001:70000], (n_var, 1, 10000));

In [None]:
train_y = train_y.+ 1;
y_test = test_y.+ 1;

In [None]:
y_train = zeros(UInt8, (n, n_categories))
for i in 1:60000
    y_train[i, train_y[i]] = 0x01
end

## compare vanilla and EVGO gradient descent.

In [None]:
nn_van = NN.NeuralNetwork(n_var, (40,20,20), n_categories, η=0.02);
nn_evgo = EVGONN.NeuralNetwork(n_var, (40,20,20), n_categories, 3, η=0.05, β1=0.02, β2=0.0000001);

In [None]:
function EV_check(nn)
    checks = Dict("HIT" => 0, "MISS" => 0)
    for i in 1:10000
        if argmax(EVGONN.predict(EVGONN.prepare(X_test[:, 1, i]'), nn)["result"])[2] == y_test[i]
            checks["HIT"] += 1
        else
            checks["MISS"] += 1
        end
    end
    return checks["HIT"] / 10000
end

In [None]:
function EV_train(nn, iters=10000)
    start = time()
    costs = zeros(60000)
    old_costs = sum(costs)
    losses = zeros(2000)
    j = 1
    for it in 1:iters
        for i in 1:60000
            result = EVGONN.train(EVGONN.prepare(X_train[:, 1, i]'), EVGONN.prepare(y_train[i, :]'), nn)
            costs[i] = result["cost"]
        end
        if it % 5 == 0
            new_costs = sum(costs)
            losses[j] = new_costs
            println("error = ", new_costs)
            j = j + 1
            if abs(new_costs - old_costs) < 10
                total_time = (time() - start)
                test_acc = EV_check(nn)
                return [new_costs, test_acc, it, total_time, losses]
            end
            old_costs = new_costs
        end
        if it % 10 == 0
            nn.learning_rate = nn.learning_rate * 0.9
            nn.β1 = nn.β1 * 0.99
        end
    end
    new_costs = sum(costs)
    total_time = (time() - start)
    test_acc = EV_check(nn)
    return [new_costs, test_acc, iters, total_time, losses]
end

In [None]:
result_EV = EV_train(nn_evgo)

In [None]:
function VAN_check(nn)
    checks = Dict("HIT" => 0, "MISS" => 0)
    for i in 1:10000
        if argmax(NN.predict(NN.prepare(X_test[:, 1, i]'), nn)["result"])[2] == y_test[i]
            checks["HIT"] += 1
        else
            checks["MISS"] += 1
        end
    end
    return checks["HIT"] / 10000
end

In [None]:
function VAN_train(nn, iters=10000)
    start = time()
    costs = zeros(60000)
    old_costs = sum(costs)
    losses = zeros(2000)
    j = 1
    for it in 1:iters
        for i in 1:60000
            result = NN.train(NN.prepare(X_train[:, 1, i]'), NN.prepare(y_train[i, :]'), nn)
            costs[i] = result["cost"]
        end
        if it % 5 == 0
            new_costs = sum(costs)
            losses[j] = new_costs
            println("error = ", new_costs)
            j = j + 1
            if abs(new_costs - old_costs) < 10
                total_time = (time() - start)
                test_acc = VAN_check(nn)
                return [new_costs, test_acc, it, total_time, losses]
            end
            old_costs = new_costs
        end
        if it % 10 == 0
            nn.learning_rate = nn.learning_rate * 0.9
        end
    end
    new_costs = sum(costs)
    total_time = (time() - start)
    test_acc = EV_check(nn)
    return [new_costs, test_acc, iters, total_time, losses]
end

In [None]:
result_VAN = VAN_train(nn_van)