In [6]:
using Flux, MLDatasets, ImageCore, StatsBase
using MLDatasets: MNIST
using Flux: train!, update!
using Flux: onehot, throttle, crossentropy
using StatsBase: sample

In [28]:
using Flux
using Flux: Data.DataLoader
using Flux: onehotbatch, onecold, crossentropy
using Flux: @epochs
using Statistics
using MLDatasets

# load full training set
train_x, train_y = MNIST.traindata();

train_x_vec = [vec(train_x[:, :, i]) for i = 1:60000];
train_y_hot = [onehot(train_y[i], 0:9) for i = 1:60000];

# load full test set
test_x,  test_y  = MNIST.testdata();

test_x_vec = [vec(test_x[:, :, i]) for i = 1:10000];
test_y_hot = [onehot(test_y[i], 0:9) for i = 1:10000];

traindata = [(train_x_vec[i], train_y_hot[i]) for i = 1:60000];
testdata = [(test_x_vec[i], test_y_hot[i]) for i = 1:10000];

In [29]:
## Train final linear layer with random standard neural network
## Train final linear layer with random convolutional neural network
## Hopefully the second one has a lower loss on average (If not need to rethink some things)
## Train a standard neural network parametrized by a linear function of each layers weights
## Evolve the linear parametrization using an evolutionary algorithm where the fitness is the average loss 
## after training of n (=10?) input parameters. Can the parameters of the linear reparameterization model
## be sparsified?

In [83]:
parameter_model = function(x, W = nothing, b = nothing)
    W*x + b;
end

max_parameters = 9
input = 784
hidden_out = 32

W_weight = randn(input*hidden_out, max_parameters)
b_weight = randn(input*hidden_out)
#W_bias = randn(hidden_out, max_parameters)
#b_bias = randn(hidden_out)

weight_model(x) = parameter_model(x, W_weight, b_weight)
#bias_model(x) = parameter_model(x, W_bias, b_bias)

x = randn(max_parameters)

W_init(out, in) = weight_model(x)
b_init(out) = zeros(out)

data_model = Chain(
      Dense(input, hidden_out, relu; initW=W_init, initb=b_init),
      Dense(hidden_out, 10),
      softmax
    )

num_hidden_layers = 1
ps_hidden = Flux.params(m[1:num_hidden_layers])

#ps_out = Flux.params(m[2:end])

LoadError: MethodError: no method matching Dense(::Vector{Float64}, ::Vector{Float64}, ::typeof(relu))
[0mClosest candidates are:
[0m  Dense([91m::M[39m, ::Any, ::F) where {M<:(AbstractMatrix{T} where T), F} at /Users/joshnunley/.julia/packages/Flux/ZnXxS/src/layers/basic.jl:127
[0m  Dense([91m::Integer[39m, [91m::Integer[39m, ::Any; initW, initb, init, bias) at /Users/joshnunley/.julia/packages/Flux/ZnXxS/src/layers/basic.jl:133
[0m  Dense([91m::M[39m, ::Any) where M<:(AbstractMatrix{T} where T) at /Users/joshnunley/.julia/packages/Flux/ZnXxS/src/layers/basic.jl:127

In [72]:
b_init(out) = zeros(out)

m = Chain(
      Dense(784, 784, relu; initb=b_init),
      Dense(784, 10),
      softmax
    )
ps_out = Flux.params(m[2:end])

loss(x, y) = Flux.Losses.crossentropy(m(x), y)

opt = Descent(0.1)

accuracy(x, y) = mean(onecold.(x) .== onecold.(y))
evalcb() = @show(loss(train, test_y))
throttled_cb = throttle(evalcb, 1)

(::Flux.var"#throttled#68"{Flux.var"#throttled#64#69"{Bool, Bool, typeof(evalcb), Int64}}) (generic function with 1 method)

In [73]:
trainsample = sample(traindata, 600, replace=false);

In [74]:
for (i, d) in enumerate(trainsample)
    gs = gradient(ps_out) do
      training_loss = loss(d...)
      # Code inserted here will be differentiated, unless you need that gradient information
      # it is better to do the work outside this block.
      return training_loss
    end
    # Insert whatever code you want here that needs training_loss, e.g. logging.
    # logging_callback(training_loss)
    # Insert what ever code you want here that needs gradient.
    # E.g. logging with TensorBoardLogger.jl as histogram so you can see if it is becoming huge.
    update!(opt, ps_out, gs)
end

In [70]:
accuracy(m.(train_x_vec), train_y_hot)

0.7251333333333333

In [75]:
accuracy(m.(test_x_vec), test_y_hot)

0.7641