In [1]:
using Pkg
pkg"activate ."
#pkg"add https://github.com/sadit/KCenters.jl SimilaritySearch MLDatasets MLDataUtils LinearAlgebra Images StatsBase JSON"
using KCenters, SimilaritySearch, MLDatasets, MLDataUtils, LinearAlgebra, Images, StatsBase, JSON

[32m[1mActivating[22m[39m environment at `~/Research/KCenters.jl/tutorials/Project.toml`
[32m[1m  Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m  Updating[22m[39m git-repo `https://github.com/JuliaRegistries/General.git`
[?25l[2K[?25h[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `~/Research/KCenters.jl/tutorials/Project.toml`
 [90m [5d8de97f][39m[93m ~ KCenters v0.1.6 #master (https://github.com/sadit/KCenters.jl)[39m
[32m[1m  Updating[22m[39m `~/Research/KCenters.jl/tutorials/Manifest.toml`
 [90m [5d8de97f][39m[93m ~ KCenters v0.1.6 #master (https://github.com/sadit/KCenters.jl)[39m


┌ Info: Precompiling KCenters [5d8de97f-65f8-4dd6-a15b-0f89c36a43ce]
└ @ Base loading.jl:1273


In [24]:
"""
    normalize_vector(X, i)

Copies, converts to a vector of 28*28 dimensions and normalizes it
"""
function normalize_vector(X, i)
    v = @view X[:, :, i]
    normalize!(Float32.(reshape(v, 784)))
    #Float32.(reshape(v, 784))
end

"""
    load_data()

Loads the MNIST benchmark and converts the dataset to individual vectors; labels are also encoded as integers
"""
function load_data()
    train_X, train_y = MNIST.traindata()
    test_X, test_y = MNIST.testdata()
    le = labelenc(train_y)
    
    # KCenters work with collections of vectors more than concatenation of them
    X1 = [normalize_vector(train_X, c) for c in 1:size(train_X, 3)]
    X2 = [normalize_vector(test_X, c) for c in 1:size(test_X, 3)]

    X1, label2ind.(train_y, le), X2, label2ind.(test_y, le), le
end




load_data

In [7]:
function display_examples(P, train_X, s=7)
    f(x) = reshape(x, (28, 28))'

    for c in unique(rand(1:length(P.nc.centers), s))
        sm = unique(rand(P.index.lists[c], s))
        xx = hcat(f(P.nc.centers[c]), f.(train_X[sm])...)
        (Gray.(xx)) |> display
    end
end

display_examples (generic function with 2 methods)

In [25]:
JSON.lower(f::Function) = string(f)

function run_test(train_X, train_y, test_X, test_y; verbose=true, kwargs...)
    O = []
    P = nothing
    best_list = search_params(AKNC, train_X, train_y, 32,
        bsize=8, mutation_bsize=3, ssize=8, folds=0.7, search_maxiters=8,
        score=:accuracy, tol=0.01, verbose=verbose,
        kernel=[relu_kernel, direct_kernel];
        kwargs...)
    
    for (i, c) in enumerate(best_list)
        println(stderr, i, ", score=", c[2], ", config=", JSON.json(c[1]))
    end

    config, score = best_list[1]
    model = fit(config, train_X, train_y, verbose=verbose)
    ypred = predict(model, test_X)
    s = scores(test_y, ypred)
    println(stderr, JSON.json(s, 2))
    model, s
end

train_X, train_y, test_X, test_y, le = load_data()
run_test(train_X, train_y, test_X, test_y; ncenters=[0], dist=[l2_distance, cosine_distance]);

iteration 1 finished
generating 8 configurations using top 8 configurations, starting with 4)
[0.819, 0.7754444444444445, 0.7557222222222222, 0.7414444444444445]
AKNC_Config(KCenters.direct_kernel, SimilaritySearch.l2_distance, Statistics.mean, 1, 0, 0, 1.0, :rand, 0.0, 1) => 0.819
finished with 4
iteration 2 finished
stopping on iteration 2 due to a possible convergence (0.819 ≃ 0.819, tol: 0.01)
1, score=0.819, config={"kernel":"direct_kernel","dist":"l2_distance","centroid":"mean","k":1,"ncenters":0,"maxiters":0,"recall":1.0,"initial_clusters":"rand","split_entropy":0.0,"minimum_elements_per_centroid":1}
2, score=0.7754444444444445, config={"kernel":"relu_kernel","dist":"l2_distance","centroid":"mean","k":1,"ncenters":0,"maxiters":0,"recall":1.0,"initial_clusters":"rand","split_entropy":0.0,"minimum_elements_per_centroid":1}
3, score=0.7557222222222222, config={"kernel":"direct_kernel","dist":"cosine_distance","centroid":"mean","k":1,"ncenters":0,"maxiters":0,"recall":1.0,"initial_c

In [17]:
model, s = run_test(train_X, train_y, test_X, test_y, verbose=false, ncenters=[100], dist=[l2_distance], k=[1], split_entropy=[0.3], initial_clusters=[:fft], minimum_elements_per_centroid=[3], maxiters=[3])

1, score=0.9346666666666666, config={"kernel":"direct_kernel","dist":"l2_distance","centroid":"mean","k":1,"ncenters":100,"maxiters":3,"recall":1.0,"initial_clusters":"fft","split_entropy":0.3,"minimum_elements_per_centroid":3}
2, score=0.3373888888888889, config={"kernel":"relu_kernel","dist":"l2_distance","centroid":"mean","k":1,"ncenters":100,"maxiters":3,"recall":1.0,"initial_clusters":"fft","split_entropy":0.3,"minimum_elements_per_centroid":3}
{
  "micro_f1": 0.9316,
  "precision": 0.9316,
  "recall": 0.9316,
  "macro_recall": 0.930948284365235,
  "macro_f1": 0.9310572469264009,
  "accuracy": 0.9316,
  "class_f1": {
    "7": 0.9188921859545005,
    "9": 0.9102691924227319,
    "4": 0.9707551287647316,
    "10": 0.9219143576826196,
    "2": 0.9699745547073791,
    "3": 0.9160696008188333,
    "5": 0.8927875243664718,
    "8": 0.9592152813629323,
    "6": 0.9345063538611926,
    "1": 0.9161882893226178
  },
  "class_precision": {
    "7": 0.9179841897233202,
    "9": 0.933537832310

(AKNC{Array{Float32,1}}(KNC{Array{Float32,1}}(Array{Float32,1}[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0  …  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0

In [23]:
f(x) = Gray.(reshape(x, (28, 28))')


for i in unique(rand(1:length(model.nc.centers), 7))
    #    sm = unique(rand(P.index.lists[c], s))
    #    xx = hcat(f(P.nc.centers[c]), f.(train_X[sm])...)
    #    (Gray.(xx)) |> display
    label = ind2label(model.nc.class_map[i], le)
    display("$i => $(label)")
    f(model.nc.centers[i]) |> display
end

UndefVarError: UndefVarError: le not defined