In [1]:
using Pkg
Pkg.activate(".")

using Plots, HypertextLiteral, Random
include("../src/load.jl") # load datasets
include("../src/surrogate.jl")
include("../src/run.jl") # run tests

[32m[1m  Activating[22m[39m project at `~/Research/SurrogateDistanceModels/notebooks`
┌ Info: Precompiling SimilaritySearch [053f045d-5466-53fd-b400-a066f88fe02a]
└ @ Base loading.jl:1662


test_searchgraph (generic function with 2 methods)

In [2]:
Threads.nthreads()

64

In [3]:
]status

[32m[1mStatus[22m[39m `~/Research/SurrogateDistanceModels/notebooks/Project.toml`
 [90m [336ed68f] [39mCSV v0.10.4
 [90m [a93c6f00] [39mDataFrames v1.3.4
 [90m [0b91fe84] [39mDisplayAs v0.1.6
 [90m [c5bfea45] [39mEmbeddings v0.4.2
 [90m [c27321d9] [39mGlob v1.3.0
 [90m [f67ccb44] [39mHDF5 v0.16.11
 [90m [ac1192a8] [39mHypertextLiteral v0.9.4
 [90m [033835bb] [39mJLD2 v0.4.22
 [90m [682c06a0] [39mJSON v0.21.3
[32m⌃[39m[90m [91a5bcdd] [39mPlots v1.31.5
 [90m [612083be] [39mQueryverse v0.7.0
 [90m [ca7ab67e] [39mSimSearchManifoldLearning v0.2.5
 [90m [053f045d] [39mSimilaritySearch v0.9.4 `../../SimilaritySearch.jl`
 [90m [f3b207a7] [39mStatsPlots v0.15.1
[36m[1mInfo[22m[39m Packages marked with [32m⌃[39m have new versions available


In [6]:
function run_experiment(D, k;
        kscalelist=[1, 8, 16],
        npairslist=[256, 512, 1024, 2048],
        npoolslist=[32, 64, 128, 256],
        ssizelist=[4, 8, 16],
        topklist=[15, 31, 63],
        npermslist=[4, 8, 16],
        permsizelist=[64]
    )
    D.params["k"] = k
    D.params["enctime"] = 0.0
    Gold = test_exhaustive(nothing, D.db, D.queries, D.dist, copy(D.params), k)
    test_searchgraph(Gold, D.db, D.queries, D.dist, copy(D.params), k)
    test_searchgraph(Gold, D.db, D.queries, D.dist, copy(D.params), k, 0.6)

    surrogates = []
    dim = length(D.db[1])
    for kscale in kscalelist
        # push!(surrogates, BinaryHammingFixedPairs(kscale))
        
        for npairs in npairslist
            push!(surrogates, BinaryHammingSurrogate(kscale, npairs, dim))
        end
        
        for nperms in npermslist, permsize in permsizelist
            push!(surrogates, Perms(permsize, nperms, dim, kscale))
            s = ceil(Int, permsize/3)
            push!(surrogates, BinPerms(s, permsize, nperms, dim, kscale))            
        end
        
        for ssize in ssizelist, npools in npoolslist
            push!(surrogates, MaxHashSurrogate(ssize, npools, dim, kscale))
        end        
        
        #=
        for topk in topklist
            #push!(surrogates, TopKSurrogate(topk, dim, kscale))
            push!(surrogates, SmoothedTopK(topk, dim, kscale))
        end =#
    end

    for E in surrogates
        enctime = @elapsed H = encode(E, D.db, D.queries, copy(D.params))
        H.params["enctime"] = enctime
        test_exhaustive(Gold, H.db, H.queries, H.dist, copy(H.params), k * kscale(E))
        test_searchgraph(Gold, H.db, H.queries, H.dist, copy(H.params), k * kscale(E), 0)
    end
end

run_experiment (generic function with 1 method)

In [None]:
k=32

let
    D = load_glove_400k()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_wit_300k()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_glove_1m()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end

let
    D = load_bigann_1m()
    @show size(D.db.matrix), D.dist
    run_experiment(D, k)
end


(size(D.db.matrix), D.dist) = ((100, 390000), NormalizedCosineDistance())
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 4, 100, 1)
(shift, permsize, nperms, dim, kscale) = (22, 64, 4, 100, 1)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 8, 100, 1)
(shift, permsize, nperms, dim, kscale) = (22, 64, 8, 100, 1)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 16, 100, 1)
(shift, permsize, nperms, dim, kscale) = (22, 64, 16, 100, 1)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 4, 100, 8)
(shift, permsize, nperms, dim, kscale) = (22, 64, 4, 100, 8)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 8, 100, 8)
(shift, permsize, nperms, dim, kscale) = (22, 64, 8, 100, 8)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 16, 100, 8)
(shift, permsize, nperms, dim, kscale) = (22, 64, 16, 100, 8)
(:Perms, permsize, nperms, dim, kscale) = (:Perms, 64, 4, 100, 16)
(shift, permsize, nperms, dim, kscale) = (22, 64, 4, 100, 16)
(:Perms, permsize, nperms, dim,