In [1]:
using Flux
using Flux: @epochs, throttle
using ExprOptimization
using Base.Iterators: repeated
using BSON: @save
using BSON: @load
using Plots
using LinearAlgebra

# Function to plot loss

In [2]:
function plotLoss(lossVals)
    floatLossVals = [Tracker.data(lv) for lv in lossVals]
    gr(fmt=:png)
    plot()
    plot!(floatLossVals,
        xlabel = "time",
        ylabel = "loss",
        label = "mse loss",
        title = "loss evolution")
end

plotLoss (generic function with 1 method)

# Example of how to use Flux

In [11]:
function train_model(model, X_train, Y_train; verbose = false)
    loss(x, y) = Flux.mse(model(x), y)
    data = repeated((X_train, Y_train), 10)
    opt = Flux.Optimise.ADAM()
    lossVals = [loss(X_train,Y_train)]
    evalcb = () -> begin curLoss = loss(X_train,Y_train)
        push!(lossVals, curLoss)
        verbose && @show(curLoss) end
    evalcb2 = () -> @save "model-flux-trial.bson" model
    verbose && @show loss(X_train,Y_train)
    @epochs 10 Flux.train!(loss, params(model), data, opt, cb = [throttle(evalcb, 30), throttle(evalcb2, 60)])
    verbose && @show lossVals
    verbose && plotLoss(lossVals)
    return lossVals
end

train_model (generic function with 1 method)

In [88]:
input_size = 3
output_size = 1
X_train = [zeros(input_size,10) ones(input_size,10) 2*ones(input_size,10) 3*ones(input_size,10) zeros(input_size,10)]
Y_train = [zeros(output_size,10) ones(output_size,10) ones(output_size,10) ones(output_size,10) zeros(output_size,10)]
model = Chain(Dense(input_size,128, relu), Dense(128,output_size, relu)) 


Chain(Dense(3, 128, NNlib.relu), Dense(128, 1, NNlib.relu))

In [89]:
train_model(model, X_train, Y_train)

┌ Info: Epoch 1
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 2
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 3
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 4
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 5
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 6
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 7
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 8
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 9
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 10
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


11-element Array{Tracker.TrackedReal{Float64},1}:
 0.19310455865444515 
 0.16186328938066985 
 0.08679083034311014 
 0.0717480069993897  
 0.06746267261672259 
 0.05947459958400506 
 0.05301820226744454 
 0.04711346114396875 
 0.041332898143016904
 0.035780410357353624
 0.03054061506612096 

In [90]:
train_model(model, X_train, Y_train, verbose = true)

loss(X_train, Y_train) = 0.026116474461597594 (tracked)
curLoss = 0.025434441681776932 (tracked)
curLoss = 0.017149314729397958 (tracked)
curLoss = 0.010145911998658264 (tracked)
curLoss = 0.0049242869205732955 (tracked)
curLoss = 0.0017637792683359522 (tracked)
curLoss = 0.0003621262522504054 (tracked)
curLoss = 1.440591663737223e-5 (tracked)
curLoss = 1.163286822887244e-5 (tracked)
curLoss = 4.628345533319589e-6 (tracked)
curLoss = 5.323641748589125e-6 (tracked)
lossVals = Tracker.TrackedReal{Float64}[0.0261165, 0.0254344, 0.0171493, 0.0101459, 0.00492429, 0.00176378, 0.000362126, 1.44059e-5, 1.16329e-5, 4.62835e-6, 5.32364e-6]


┌ Info: Epoch 1
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 2
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 3
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 4
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 5
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 6
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 7
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 8
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 9
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 10
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


11-element Array{Tracker.TrackedReal{Float64},1}:
 0.026116474461597594 
 0.025434441681776932 
 0.017149314729397958 
 0.010145911998658264 
 0.0049242869205732955
 0.0017637792683359522
 0.0003621262522504054
 1.440591663737223e-5 
 1.163286822887244e-5 
 4.628345533319589e-6 
 5.323641748589125e-6 

In [91]:
@show model(ones(input_size,1))
@show model(3*ones(input_size,1))
@show model(5*ones(input_size,1))
@show model(zeros(input_size,1))

model(ones(input_size, 1)) = Float32[1.00201] (tracked)
model(3 * ones(input_size, 1)) = Float32[0.996905] (tracked)
model(5 * ones(input_size, 1)) = Float32[0.986082] (tracked)
model(zeros(input_size, 1)) = Float32[0.0] (tracked)


Tracked 1×1 Array{Float32,2}:
 0.0f0

# Example of ExprOptimization

In [177]:
const grammar = @grammar begin
    Real = x
    Real = Real * Real
    Real = Real + Real
    Real = Real - Real
    Real = |(1:5)
end



1: Real = x
2: Real = Real * Real
3: Real = Real + Real
4: Real = Real - Real
5: Real = 1
6: Real = 2
7: Real = 3
8: Real = 4
9: Real = 5


In [178]:
const S = SymbolTable(grammar)




Dict{Symbol,Any} with 5 entries:
  :+    => +
  :Real => Real
  :-    => -
  :*    => *
  :x    => [1, 2, 3, 2]

In [179]:
ground_truth(x) = x*x + 2x + 1
function loss(tree::RuleNode, grammar::Grammar)
    ex = get_executable(tree, grammar)
    los = 0.0
    for x = -5.0:1.0:5.0
        S[:x] = x
        los += abs2(Core.eval(S,ex) - ground_truth(x))
    end
    los
end

loss (generic function with 2 methods)

## MonteCarlo Arguments:
- num_samples::Int: number of samples
- max_depth::Int: maximum depth of derivation tree

In [180]:
using Random
Random.seed!(10)
p = MonteCarlo(20000, 6)
results_mc = optimize(p, grammar, :Real, loss)
(results_mc.expr, results_mc.loss)

(:((x + 3) * x), 121.0)

In [182]:
display(results_mc.tree, grammar)


TreeView.LabelledTree({5, 4} directed simple Int64 graph, Any[:*, :+, :x, 3, :x])

## GeneticProgram Arguments
- pop_size::Int: population size
- iterations::Int: number of iterations
- max_depth::Int: maximum depth of derivation tree
- p_reproduction::Float64: probability of reproduction operator
- p_crossover::Float64: probability of crossover operator
- p_mutation::Float64: probability of mutation operator
- init_method::InitializationMethod: initialization method
- select_method::SelectionMethod: selection method

In [184]:
Random.seed!(1)
p = GeneticProgram(1000,20,6,0.3,0.3,0.4)
results_gp = optimize(p, grammar, :Real, loss)
(results_gp.expr, results_gp.loss)

(:((x * 2 + (x * x - 3)) + 4), 0.0)

In [186]:
@show results_gp

results_gp = ExprOptResult(3{3{2{1,6}4{2{1,1}7}}8}, 0.0, :((x * 2 + (x * x - 3)) + 4), nothing)


ExprOptResult(3{3{2{1,6}4{2{1,1}7}}8}, 0.0, :((x * 2 + (x * x - 3)) + 4), nothing)

# True grammar

In [5]:
input_size = 1
output_size = 1
X_train = [zeros(input_size,10) ones(input_size,10) 2*ones(input_size,10) 3*ones(input_size,10) zeros(input_size,10)]
Y_train = [zeros(output_size,10) ones(output_size,10) ones(output_size,10) ones(output_size,10) zeros(output_size,10)]

X_test = [zeros(input_size,2) 2*ones(input_size,2) 3*ones(input_size,2)]
Y_test = [zeros(input_size,2) ones(input_size,2) ones(input_size,2)]


1×6 Array{Float64,2}:
 0.0  0.0  1.0  1.0  1.0  1.0

In [12]:
const grammar = @grammar begin
model = Chain(in_layer, layer, layer, out_layer) | Chain(in_layer, layer, layer, layer, out_layer)
in_layer = Dense(input_size, 256, activation)
out_layer = Dense(256, output_size, activation)
layer = Dense(256,256, activation)
activation = relu
end



1: model = Chain(in_layer, layer, layer, out_layer)
2: model = Chain(in_layer, layer, layer, layer, out_layer)
3: in_layer = Dense(input_size, 256, activation)
4: out_layer = Dense(256, output_size, activation)
5: layer = Dense(256, 256, activation)
6: activation = relu


In [41]:
const S = SymbolTable(grammar)




Dict{Symbol,Any} with 5 entries:
  :output_size => 1
  :relu        => relu
  :Chain       => Chain
  :Dense       => Dense
  :input_size  => 1

In [43]:
dmap = mindepth_map(grammar)
best_tree, best_loss = RuleNode(0), Inf

typ = :model

tree = rand(RuleNode, grammar, typ, dmap, p.max_depth)
ex = get_executable(tree, grammar)
model = Core.eval(S,ex)


Chain(Dense(1, 256, NNlib.relu), Dense(256, 256, NNlib.relu), Dense(256, 256, NNlib.relu), Dense(256, 1, NNlib.relu))

In [51]:
function loss(tree::RuleNode, grammar::Grammar)
    ex = get_executable(tree, grammar)
    model = Core.eval(S,ex)
    train_model(model, X_train, Y_train, verbose = true)

    Y_NN = model(X_test)
    println("input $X_test")
    Y_NN = [Tracker.data(ynn) for ynn in Y_NN]
    println("output $Y_NN")
    println("expected output $Y_test")
    los = norm(Y_NN - Y_test)

    println("returning $los")
    return los
end

loss (generic function with 1 method)

In [52]:
using Random
Random.seed!(10)
p = MonteCarlo(1, 2)


results_mc = optimize(p, grammar, :model, loss, verbose = true)


samples: 1 of 1
loss(X_train, Y_train) = 0.5998604598770965 (tracked)


┌ Info: Epoch 1
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.42463012541072703 (tracked)


┌ Info: Epoch 2
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.0712523831675477 (tracked)
curLoss = 0.06842507460217533 (tracked)


┌ Info: Epoch 3
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 4
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.04872484769613621 (tracked)
curLoss = 0.03647552918800443 (tracked)


┌ Info: Epoch 5
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 6
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.02601260760985511 (tracked)
curLoss = 0.013789237774933127 (tracked)


┌ Info: Epoch 7
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.0024440880415319555 (tracked)


┌ Info: Epoch 8
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105
┌ Info: Epoch 9
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


curLoss = 0.0004367866475405435 (tracked)
curLoss = 0.0001345190339783642 (tracked)


┌ Info: Epoch 10
└ @ Main /home/somrita/.julia/packages/Flux/qXNjB/src/optimise/train.jl:105


lossVals = Tracker.TrackedReal{Float64}[0.59986, 0.42463, 0.0712524, 0.0684251, 0.0487248, 0.0364755, 0.0260126, 0.0137892, 0.00244409, 0.000436787, 0.000134519]
input [0.0 0.0 2.0 2.0 3.0 3.0]
output Float32[0.0 0.0 1.01624 1.01624 0.992336 0.992336]
expected output [0.0 0.0 1.0 1.0 1.0 1.0]
returning 0.025393422498184843


ExprOptResult(2{3{6}5{6}5{6}5{6}4{6}}, 0.025393422498184843, :(Chain(Dense(input_size, 256, relu), Dense(256, 256, relu), Dense(256, 256, relu), Dense(256, 256, relu), Dense(256, output_size, relu))), nothing)

In [53]:
(results_mc.expr, results_mc.loss)

(:(Chain(Dense(input_size, 256, relu), Dense(256, 256, relu), Dense(256, 256, relu), Dense(256, 256, relu), Dense(256, output_size, relu))), 0.025393422498184843)

In [16]:
size(X_test)

(1, 6)

In [34]:
ark = 3
println("returning $ark")

returning 3


# Scrap

In [None]:
# dmap = mindepth_map(grammar)
# best_tree, best_loss = RuleNode(0), Inf

# typ = :model

# tree = rand(RuleNode, grammar, typ, dmap, p.max_depth)
# ex = get_executable(tree, grammar)
#     los = 0.0
# x = 1.0
# S[:x] = x
# model = Core.eval(S,ex)

# X_train = [zeros(input_size,10) ones(input_size,10) 2*ones(input_size,10) 3*ones(input_size,10) zeros(input_size,10)]
# Y_train = [zeros(output_size,10) ones(output_size,10) ones(output_size,10) ones(output_size,10) zeros(output_size,10)]

# train_model(model, X_train, Y_train, verbose = false)

# los = loss(tree, grammar)

In [30]:
@grammar begin
model = Chain(layer, layer, layer, layer, relu) | Chain(layer, layer, layer, layer, layer, relu)
layer = Dense(n, activation)
n = 128|256|512|1024
activation = relu
end

1: model = Chain(layer, layer, layer, layer, relu)
2: model = Chain(layer, layer, layer, layer, layer, relu)
3: layer = Dense(n, activation)
4: n = 128
5: n = 256
6: n = 512
7: n = 1024
8: activation = relu


In [1]:
@grammar begin
model = Chain(layer, layer, relu) | Chain(layer, layer, layer, relu)
layer = Dense(n, activation) | OtherLayerType(...)
n = |(50:50:500)
activation = relu | tanh
end

LoadError: syntax: invalid identifier name "..."

In [None]:
model2 = Chain(
  Dense(10, 5, σ),
  Dense(5, 2),
  softmax)

In [19]:
imgs = MNIST.images()
# Stack images into one large batch
X = hcat(float.(reshape.(imgs, :))...) |> gpu

UndefVarError: UndefVarError: MNIST not defined

In [None]:
# Classify MNIST digits with a simple multi-layer-perceptron

imgs = MNIST.images()
# Stack images into one large batch
X = hcat(float.(reshape.(imgs, :))...) |> gpu

labels = MNIST.labels()
# One-hot-encode the labels
Y = onehotbatch(labels, 0:9) |> gpu

m = Chain(
  Dense(28^2, 32, relu),
  Dense(32, 10),
  softmax) |> gpu

loss(x, y) = crossentropy(m(x), y)

accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))

dataset = repeated((X, Y), 200)
evalcb = () -> @show(loss(X, Y))
opt = ADAM()

Flux.train!(loss, params(m), dataset, opt, cb = throttle(evalcb, 10))

accuracy(X, Y)

# Test set accuracy
tX = hcat(float.(reshape.(MNIST.images(:test), :))...) |> gpu
tY = onehotbatch(MNIST.labels(:test), 0:9) |> gpu

accuracy(tX, tY)
