Skip to content

Commit

Permalink
adds Polyester; support for SimilaritySearch 0.9
Browse files Browse the repository at this point in the history
  • Loading branch information
sadit committed Jun 17, 2022
1 parent b0f086c commit 8d60bc8
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 20 deletions.
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
name = "KCenters"
uuid = "5d8de97f-65f8-4dd6-a15b-0f89c36a43ce"
authors = ["Eric S. Tellez <donsadit@gmail.com>"]
version = "0.6.1"
version = "0.7.0"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MLDataUtils = "cc2ba9b6-d476-5e6d-8eaf-a92d5412d41d"
Polyester = "f517fe37-dbe3-4b94-8317-1923a5111588"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SimilaritySearch = "053f045d-5466-53fd-b400-a066f88fe02a"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
Expand All @@ -18,6 +19,7 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
CategoricalArrays = "0.8, 0.9, 0.10"
Distances = "0.10"
MLDataUtils = "0.5"
SimilaritySearch = "0.8.15"
SimilaritySearch = "0.9"
StatsBase = "0.32, 0.33"
Polyester = "0.6"
julia = "1.6"
3 changes: 2 additions & 1 deletion src/KCenters.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# This file is a part of KCenters.jl

module KCenters
using SimilaritySearch
using SimilaritySearch, Polyester
using SimilaritySearch: getminbatch, getknnresult, getpools

include("criterions.jl")
include("centerselection.jl")
Expand Down
14 changes: 7 additions & 7 deletions src/clustering.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ function kcenters_(dist::SemiMetric, X::AbstractDatabase, C::AbstractDatabase; s
end

verbose && println(stderr, "*** computing centroids ***")

Threads.@threads for i in 1:length(clusters)
@batch minbatch=getminbatch(0, length(clusters)) for i in 1:length(clusters)
plist = clusters[i]
# CC[i] can be empty because we could be using approximate search
if length(plist) > 0
Expand All @@ -153,11 +152,12 @@ function kcenters_(dist::SemiMetric, X::AbstractDatabase, C::AbstractDatabase; s
ClusteringData(CC, freqs, compute_dmax(numcenters, codes, distances), codes, distances, err)
end

function associate_centroids_and_compute_error!(X, index::AbstractSearchContext, codes, distances, counters)
pools = SimilaritySearch.getpools(index)
Threads.@threads for objID in 1:length(X)
res = SimilaritySearch.getknnresult(1, pools)
search(index, X[objID], res)
function associate_centroids_and_compute_error!(X, index::AbstractSearchIndex, codes, distances, counters)
pools = getpools(index)

@batch minbatch=getminbatch(0, length(X)) for objID in 1:length(X)
res = getknnresult(1, pools)
search(index, X[objID], res; pools)
codes[objID] = argmin(res)
distances[objID] = maximum(res)
end
Expand Down
2 changes: 1 addition & 1 deletion src/enet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ function enet(dist::SemiMetric, X::AbstractDatabase, stop::Function; verbose=tru
verbose && println(stderr, "computing fartest point $(length(imaxlist)), dmax: $dmax, imax: $imax, n: $(length(X))")

@inbounds pivot = X[imax]
Threads.@threads for i in 1:N
@batch minbatch=getminbatch(0, N) for i in 1:N
@inbounds d = evaluate(dist, X[i], pivot)
@inbounds nndist[i] = min(nndist[i], d)
end
Expand Down
18 changes: 9 additions & 9 deletions src/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
export partition, knr, sequence, invindex

"""
partition(callback::Function, objects::AbstractVector{T}, refs::AbstractSearchContext; k::Int=1) where T
partition(callback::Function, objects::AbstractVector{T}, refs::AbstractSearchIndex; k::Int=1) where T
Groups items in `objects` using a nearest neighbor rule over `refs`.
The output is controlled using a callback function. The call is performed in `objects` order.
Expand All @@ -19,7 +19,7 @@ The output is controlled using a callback function. The call is performed in `ob
Please note that each object can be related to more than one group ``k > 1`` (default ``k=1``).
"""
function partition(callback::Function, objects::AbstractVector{T}, refs::AbstractSearchContext; k::Int=1) where T
function partition(callback::Function, objects::AbstractVector{T}, refs::AbstractSearchIndex; k::Int=1) where T
res = KnnResult(k)
for i in 1:length(objects)
empty!(res)
Expand All @@ -28,15 +28,15 @@ function partition(callback::Function, objects::AbstractVector{T}, refs::Abstrac
end

"""
invindex(objects::AbstractVector{T}, refs::AbstractSearchContext; k::Int=1) where T
invindex(objects::AbstractVector{T}, refs::AbstractSearchIndex; k::Int=1) where T
Creates an inverted index from references to objects.
So, an object ``u`` is in ``r``'s posting list iff ``r``
is among the ``k`` nearest references of ``u``.
"""
function invindex(objects::AbstractVector{T}, refs::AbstractSearchContext; k::Int=1) where T
π = [Vector{Int}() for i in 1:length(refs.db)]
function invindex(objects::AbstractVector{T}, refs::AbstractSearchIndex; k::Int=1) where T
π = [Vector{Int}() for _ in 1:length(refs.db)]
# partition((i, p) -> push!(π[p.id], i), dist, objects, refs, k=k)
partition(objects, refs, k=k) do i, res
for p in res
Expand All @@ -47,11 +47,11 @@ function invindex(objects::AbstractVector{T}, refs::AbstractSearchContext; k::In
end

"""
sequence(objects::AbstractVector{T}, refs::AbstractSearchContext) where T
sequence(objects::AbstractVector{T}, refs::AbstractSearchIndex) where T
Computes the nearest reference of each item in the dataset and return it as a sequence of identifiers
"""
function sequence(objects::AbstractVector{T}, refs::AbstractSearchContext) where T
function sequence(objects::AbstractVector{T}, refs::AbstractSearchIndex) where T
s = Vector{Int}(undef, length(objects))
partition(objects, refs) do i, res
s[i] = first(res).id
Expand All @@ -60,11 +60,11 @@ function sequence(objects::AbstractVector{T}, refs::AbstractSearchContext) where
end

"""
knr(objects::AbstractVector{T}, refs::AbstractSearchContext) where T
knr(objects::AbstractVector{T}, refs::AbstractSearchIndex) where T
Computes an array of k-nearest neighbors for `objects`
"""
function knr(objects::AbstractVector{T}, refs::AbstractSearchContext) where T
function knr(objects::AbstractVector{T}, refs::AbstractSearchIndex) where T
s = Vector{Vector{Int}}(undef, length(objects))
partition(objects, refs) do i, res
s[i] = [p.id for p in res]
Expand Down

2 comments on commit 8d60bc8

@sadit
Copy link
Owner Author

@sadit sadit commented on 8d60bc8 Jun 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/62568

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.0 -m "<description of version>" 8d60bc8dccb88e4708c32f13b8925f45ea35fb95
git push origin v0.7.0

Please sign in to comment.