In [1]:
using Base.Iterators: partition
# Flux stuff
using Flux, CUDA
using Flux.Losses: logitcrossentropy
using Flux.Optimise
# Image stuff
using Images, ImageIO, ImageMagick
# Plots
using Plots
# Datasets
using MLDatasets
using Statistics
using Parameters
# Other stuff
using LinearAlgebra
using Random
using ProgressBars
import Distributions: Uniform

using Suppressor

In [2]:
@with_kw mutable struct HPARAMS
    batch_size::Int = 64
    epochs::Int = 50
    # feedback frequency
    verbose_freq::Int = 100
    # learning rate
    lr::Float64 = 0.0001
    # minimum allowed fraction value
    min_frac::Float64 = 0.01
end

HPARAMS

In [3]:
# load full training set
train_x, train_y = FashionMNIST.traindata()
# load full test set
test_x,  test_y  = FashionMNIST.testdata()

(Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

...

Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8]

Normed{UInt8,8}[0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; … ; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8; 0.0N0f8 0.0N0f8 … 0.0N0f8 0.0N0f8], [9, 2, 1, 1, 6, 1, 4, 6, 5, 

In [4]:
println(size(train_x), typeof(train_x))
println(size(train_y), typeof(train_y))
println(size(test_x))
println(size(test_y))

(28, 28, 60000)Base.ReinterpretArray{Normed{UInt8,8},3,UInt8,Array{UInt8,3}}
(60000,)Array{Int64,1}
(28, 28, 10000)
(10000,)


In [5]:
hparams = HPARAMS()

HPARAMS
  batch_size: Int64 64
  epochs: Int64 50
  verbose_freq: Int64 100
  lr: Float64 0.0001
  min_frac: Float64 0.01


In [4]:
# normalize the data to -1 to 1
image_tensor = reshape(@.(2f0 .* train_x .- 1f0), 28, 28, 1,:);
answer_tensor = reshape(train_y/9.0, 60000)
println(size(image_tensor))
# parition the above tensor to batches
data_x = [image_tensor[:, :, :, r] for r in partition(1:60000, hparams.batch_size)];
data_y = [answer_tensor[r] for r in partition(1:60000, hparams.batch_size)];
println(size(data_x[1]))
println(size(data_y))
println(data_y[1])

(28, 28, 1, 60000)


LoadError: UndefVarError: hparams not defined

In [5]:
function model()
    return Chain(
        Conv((2,2), 1 => 4; stride=1, pad=2),
        x -> leakyrelu.(x, 0.2f0),
        Dropout(0.2),
        Conv((2,2), 4=>16; stride=1, pad=2),
        x -> leakyrelu.(x, 0.2f0),
        Dropout(0.2),
        Conv((3,3), 16=>8; stride=1, pad=1),
        x -> leakyrelu.(x, 0.2f0),
        Dropout(0.2),
        Conv((3,3), 8=>4; stride=1, pad=1),
        x -> leakyrelu.(x,0.2f0),
        Dropout(0.2),
        Conv((2,2), 4=>1; stride=1, pad=1),
        x -> leakyrelu.(x, 0.2f0),
        Dropout(0.2),
        x -> reshape(x,100,:),
        Dense(100,hparams.batch_size),
        x -> leakyrelu.(x,0.2f0),
        x -> Statistics.mean(x,dims=2),
        x -> sigmoid.(x)
    )
end

model (generic function with 1 method)

In [6]:
a = model()
println(a(data_x[1]))
println(size(a(data_x[1])))
println(logitcrossentropy(a(data_x[1]),data_y[1]))

LoadError: UndefVarError: hparams not defined

In [9]:
function discrete_observation(from_nn,target,hparams)
    max_target = maximum(target)
    output = round.(from_nn .* max_target) .- max_target*0.5
    ratio_positive = length([i for i in output if i>0]) / length(output)
    ratio_negative = length([i for i in output if i<0]) / length(output)
    
    ratio = abs(ratio_positive - ratio_negative)
    if abs(ratio) > hparams.min_frac
        return ratio
    else
        return hparams.min_frac
    end
end

discrete_observation (generic function with 1 method)

In [10]:
function criterion(output,target, factor)
    return factor * logitcrossentropy(output,target)
end

criterion (generic function with 1 method)

In [11]:
function train_model(x,y,model, optim, switch, hparams)
    ps = Flux.params(model)
    a = model(x)
    y = reshape(y,size(a))
    loss = 0.0

    if switch
        factor = discrete_observation(a,y,hparams)
    else
        factor = 1.0
    end
    
    gs = gradient(ps) do
        loss = criterion(model(x),y, factor)
    end
    update!(optim, ps, gs)
    
    return loss
end

train_model (generic function with 1 method)

In [12]:
function test(model, x, y)
    x = reshape(@.(2f0 .* x .- 1f0), 28, 28, 1,:)
    y = reshape(y ./ 9.0, 10000)
    data_in = [x[:, :, :, r] for r in partition(1:10000, hparams.batch_size)];
    targets = [y[r] for r in partition(1:10000, hparams.batch_size)];
    counter = 0
    output = 0.0
    for x in data_in
        counter += 1
        if counter == length(targets)-1
            break
        end
        a = model(x)
        ans = reshape(targets[counter],size(a))
        output += Flux.mean(criterion(a,ans,1.0))
    end
    return 100.0 * output / counter
end

test (generic function with 1 method)

In [13]:
function train(; kws...)
    hparams = HPARAMS(; kws...)
    # models
    without_observation = model() |> gpu
    with_observation = model() |> gpu
    # optimisers
    no_o_optim = ADAM(hparams.lr)
    o_optim = ADAM(hparams.lr)
   for epoch in 1:hparams.epochs
        counter = 0
        for x in ProgressBar(data_x)
            counter = counter + 1 # this will count the entries of data_y
            x = cu(x)
            y = cu(data_y[counter])
            if counter == length(y)-1
                break
            end
            # now pass the data through the networks
            without_loss = train_model(x,y,without_observation,no_o_optim,false,hparams)
            with_loss = train_model(x,y,with_observation, o_optim,true,hparams)
            
            train_step = counter + epoch * hparams.epochs
            
            # test
            if train_step % hparams.verbose_freq == 0
                without = test(without_observation, cu(test_x),cu(test_y))
                with = test(with_observation, cu(test_x),cu(test_y))
                println()
                println("At $(train_step) step.")
                println("Without observations, the loss is : ", without_loss)
                println("With observations, the loss is : ", with_loss)
                println("Without observations, we have an accuracy of $(without)%")
                println("With observations, we have an accuracy of $(with)%")
            end # end of test
        end # end of inner for-loop
    end # end of epoch for-loop
end

train (generic function with 1 method)

In [14]:
@suppress_err begin
    train()
end

5.0%┣██▏                                        ┫ 47/938 [01:18<25:01, 0.6 it/s]
At 100 step.
Without observations, the loss is : 142.342041015625
With observations, the loss is : 142.33233642578125
Without observations, we have an accuracy of 13218.941604785423%
With observations, we have an accuracy of 13218.956807501974%
5.0%┣██                                        ┫ 47/938 [00:01<00:14, 64.6 it/s]
At 200 step.
Without observations, the loss is : 142.33795166015625
With observations, the loss is : 84.51300477981567
Without observations, we have an accuracy of 13218.943495967784%
With observations, we have an accuracy of 13218.965083343692%
5.1%┣██▏                                       ┫ 48/938 [00:01<00:14, 65.1 it/s]
At 300 step.
Without observations, the loss is : 142.34579467773438
With observations, the loss is : 31.13790225982666
Without observations, we have an accuracy of 13218.93797403048%
With observations, we have an accuracy of 13218.9958334988%
5.2%┣██▏               