# Dependencies

In [1]:
using CSV, DataFrames, Random

In [2]:
using Flux

In [3]:
using CUDA

In [4]:
using Plots

In [5]:
plotlyjs()
theme(:wong);

# Data loading

In [6]:
function splitdf(df, pct)
   @assert 0 <= pct <= 1
   ids = collect(axes(df, 1))
   shuffle!(ids)
   sel = ids .<= nrow(df) .* pct
   return view(df, sel, :), view(df, .!sel, :)
end;

In [7]:
df = "fashion-mnist_train.csv" |> CSV.File |> DataFrame;
df_train, df_valid = splitdf(df, 0.666)
df_test = "fashion-mnist_test.csv" |> CSV.File |> DataFrame;

In [8]:
labels = Dict(
    0 => "T-shirt",
    1 => "Trouser",
    2 => "Pullover",
    3 => "Dress",
    4 => "Coat",
    5 => "Sandal",
    6 => "Shirt",
    7 => "Sneaker",
    8 => "Bag",
    9 => "Ankle boot"
);

In [9]:
function load_y(df)
    y = df[:, :label]
    Flux.onehotbatch(y, 0:9)
end

y_train = load_y(df_train) |> y -> Flux.label_smoothing(y, 0.2f0) |> gpu
y_valid = load_y(df_valid) |> gpu;
y_test= load_y(df_test) |> gpu;

In [10]:
function load_x(df)
    x = select(df, Not(:label)) |> Matrix |> permutedims
    x = reshape(x, 28, 28, 1, :) / 255
    convert(Array{Float32,4}, x) |> m -> permutedims(m, (2,1,3,4))
end

x_train = load_x(df_train) |> gpu
x_valid = load_x(df_valid) |> gpu;
x_test = load_x(df_test) |> gpu;

# Custom training function

In [11]:
function train!(loss, ps, data, opt)
	local training_loss
	for d in data
    	gs = gradient(ps) do
      		training_loss = loss(d...)
      		return training_loss
		end
    # Insert whatever code you want here that needs training_loss, e.g. logging.
    # E.g. logging with TensorBoardLogger.jl as histogram so you can see if it is becoming huge.
    Flux.update!(opt, ps, gs)
    # Here you might like to check validation set accuracy, and break out to do early stopping.
	end
    training_loss
end;

In [12]:
predict(x) = labels[Flux.onecold(model(cat(x; dims=4)), 0:9)[1]]
get_label(y) = labels[Flux.onecold(y, 0:9)[1]]

function get_acc(model)
    function acc(x, y) 
        ŷ = model(x) |> Flux.onecold
        cy = y |> Flux.onecold
        sum(ŷ .== cy) / length(cy)
    end
end;

# CNN

In [13]:
model = Chain(
    Conv((3, 3), 1 => 32, relu)
    , AdaptiveMaxPool((14, 14))
    , Conv((3, 3), 32 => 32, relu)
    , Flux.Dropout(0.2)
    , Flux.flatten
    , Dense(4608, 32, relu)
    , Dense(32, 10)
    , softmax
) |> gpu

ps = params(model)

loss(x, y) = Flux.mse(model(x), y)

opt = ADAM()

data = Flux.DataLoader((x_train, y_train), batchsize=512, shuffle=true) |> gpu;

In [14]:
tre = []
# vde = []
# tse = []
# ats = []
cnn_acc = get_acc(model)
@time for epoch=1:75
	push!(tre, train!(loss, ps, data, opt))
	# push!(vde, loss(x_valid, y_valid))
	# push!(tse, loss(x_test, y_test))
	# push!(ats, cnn_acc(x_test, y_test))
end
plot(
    plot(tre, title="Training error")
    # , plot(vde, title="Validation error")
    # , plot(tse, title="Test error")
    # , plot(ats, title="Test accuracy")
    , legend = false
)

337.132226 seconds (222.35 M allocations: 12.501 GiB, 1.63% gc time, 25.20% compilation time)


## Evaluation

In [15]:
cnn_acc(x_test, y_test)

0.8965

In [34]:
idx = 21
xi = x_test[:, :, 1, idx]
ŷi = xi |> predict
yi = y_test[:, idx] |> get_label
plot(Gray.(xi), title="y=$(yi); ŷ=$ŷi")

# Autoencoder

In [17]:
x_train_autoenc = select(df_train, Not(:label)) |> Matrix |> permutedims |> m -> convert(Matrix{Float32}, m / 255) |> gpu;

In [18]:
x_test_autoenc = select(df_test, Not(:label)) |> Matrix |> permutedims |> m -> convert(Matrix{Float32}, m / 255) |> gpu;

## Compression and decompression

In [19]:
enc = Chain(
    Dense(784, 1024, relu)
    , Dense(1024, 256, relu)
    , Dense(256, 16, relu)
) |> gpu

knw = Chain(
    Dense(16, 16, σ)
) |> gpu

dec = Chain(
    Dense(16, 256, relu)
    , Dense(256, 1024, relu)
    , Dense(1024, 784, relu)
) |> gpu

auto = Chain(
    enc
    , knw
    , dec
) |> gpu;

## Classification

In [20]:
clf = Chain(
    Dense(16, 32, relu)
    , Dense(32, 10)
    , softmax
) |> gpu

m2 = Chain(
    enc
    , knw
    , clf
) |> gpu;

In [21]:
data_autoenc = Flux.DataLoader((x_train_autoenc, x_train_autoenc), batchsize=1024, shuffle=true) |> gpu;

In [22]:
loss_autoenc(x, y) = Flux.mse(auto(x), y)

loss_autoenc (generic function with 1 method)

In [23]:
h = []
@time for e=1:200
    push!(h, train!(loss_autoenc, params(auto), data_autoenc, ADAM()))
end;

 99.842471 seconds (46.24 M allocations: 3.039 GiB, 1.93% gc time, 2.70% compilation time)


In [24]:
h |> plot

In [25]:
og = x_test_autoenc[:, 97]
en = enc(og)
kn = knw(en)
dc = dec(kn)
plot(
    plot(
        Gray.(reshape(og, 28, 28)'), title="Original"
    ),
    plot(
        Gray.(reshape(en, 4, 4)'), title="Encoded"
    ),
    plot(
        Gray.(reshape(kn, 4, 4)'), title="Knowledge"
    ),
    plot(
        Gray.(reshape(dc, 28, 28)'), title="Decoded"
    )
)

In [26]:
data_autoenc_classif = Flux.DataLoader((knw(enc(x_train_autoenc)), y_train), batchsize=1024, shuffle=true) |> gpu;

In [27]:
loss_autoenc_classif(x, y) = Flux.mse(clf(x), y)

loss_autoenc_classif (generic function with 1 method)

In [28]:
hc = []
@time for e=1:200
    push!(hc, train!(loss_autoenc_classif, params(clf), data_autoenc_classif, ADAM()))
end;

  7.776654 seconds (13.69 M allocations: 922.156 MiB, 3.37% gc time, 4.52% compilation time)


In [29]:
hc |> plot

## Evaluation

In [30]:
compressed_x = x_test_autoenc |> enc |> knw

16×10000 CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}:
 0.191268  0.16485    0.256758  0.129981   …  0.703682   0.116      0.398914
 0.207988  0.176093   0.178108  0.0219824     0.189318   0.0281992  0.0315524
 0.68583   0.703181   0.231324  0.533245      0.819918   0.52743    0.702859
 0.754912  0.0298277  0.326331  0.22199       0.522159   0.191355   0.332208
 0.108232  0.121012   0.283919  0.273609      0.282766   0.216935   0.551678
 0.428722  0.495663   0.305458  0.388488   …  0.755866   0.8729     0.549312
 0.145977  0.379211   0.334953  0.368884      0.255474   0.0787326  0.0859922
 0.324846  0.504652   0.584936  0.860945      0.732701   0.273433   0.865057
 0.944839  0.83536    0.482264  0.714912      0.573658   0.826575   0.858464
 0.295016  0.855133   0.19923   0.246863      0.940521   0.304079   0.192003
 0.11007   0.105242   0.277313  0.128769   …  0.889754   0.172528   0.061751
 0.340552  0.498898   0.234245  0.237737      0.833262   0.100896   0.294571
 0.858741  0.0113136 

In [31]:
autoenc_preds = compressed_x |> clf |> Flux.onecold;

In [32]:
accuracy_autoenc = get_acc(clf)
accuracy_autoenc(compressed_x, y_test)

0.8291