In [1]:
using Pkg
Pkg.activate(".")

using Plots, HypertextLiteral, Random
include("../src/load.jl") # load datasets
include("../src/surrogate.jl")
include("../src/run.jl") # run tests

[32m[1m  Activating[22m[39m project at `~/Research/SurrogateDistanceModels/notebooks`


test_searchgraph (generic function with 2 methods)

In [2]:
function run_experiment(D, k;
        kscalelist=[1, 8],
        npairslist=[256, 512, 1024, 2048],
        npoolslist=[32, 64, 128, 256],
        ssizelist=[4, 8, 16]
    )
    D.params["k"] = k
    D.params["enctime"] = 0.0
    Gold = test_exhaustive(nothing, D.db, D.queries, D.dist, copy(D.params), k)
    test_searchgraph(Gold, D.db, D.queries, D.dist, copy(D.params), k)
    test_searchgraph(Gold, D.db, D.queries, D.dist, copy(D.params), k, 0.6)

    surrogates = []
    dim = length(D.db[1])
    for kscale in kscalelist
        # push!(surrogates, BinaryHammingFixedPairs(kscale))
        for npairs in npairslist
            push!(surrogates, BinaryHammingSurrogate(kscale, npairs, dim))
        end

        for ssize in ssizelist, npools in npoolslist
            push!(surrogates, MaxHashSurrogate(ssize, npools, dim, kscale))
        end
    end

    for E in surrogates
        enctime = @elapsed H = encode(E, D.db, D.queries, copy(D.params))
        H.params["enctime"] = enctime
        test_exhaustive(Gold, H.db, H.queries, H.dist, copy(H.params), k * kscale(E))
        test_searchgraph(Gold, H.db, H.queries, H.dist, copy(H.params), k * kscale(E), 0)
    end
end

run_experiment (generic function with 1 method)

In [3]:
k=32

let
    D = load_glove_400k()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_wit_300k()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_glove_1m()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_bigann_1m()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end


(size(D.db.matrix), D.dist) = ((100, 390000), NormalizedCosineDistance())
(size(D.db.matrix), D.dist) = ((512, 308374), NormalizedCosineDistance())
(size(D.db.matrix), D.dist) = ((100, 1183514), NormalizedCosineDistance())
(size(D.db.matrix), D.dist) = ((128, 1000000), SqL2Distance())
