Skip to content

Commit

Permalink
Lots of bug fixes and improvements. Had to disable for int32 temporarily
Browse files Browse the repository at this point in the history
while I sort that out. This also adds the max_k option to radius_search
as discussed in #170.
  • Loading branch information
translunar committed Mar 7, 2014
1 parent 466c8e9 commit 7694b11
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 80 deletions.
1 change: 1 addition & 0 deletions src/ruby/flann.gemspec
Expand Up @@ -53,6 +53,7 @@ Gem::Specification.new do |gem|
gem.add_development_dependency 'rake'
gem.add_development_dependency 'bundler'
gem.add_development_dependency 'rspec'
gem.add_development_dependency 'rspec-longrun'
gem.add_development_dependency 'pry'
end

45 changes: 34 additions & 11 deletions src/ruby/lib/flann.rb
Expand Up @@ -37,14 +37,17 @@ module Flann
ffi_lib "libflann"

# Declare enumerators
Algorithm = enum(:linear, :kdtree, :kmeans, :composite, :kdtree_single, :saved, :autotuned)
CentersInit = enum(:random, :gonzales, :kmeanspp)
LogLevel = enum(:none, :fatal, :error, :warn, :info)
DistanceType = enum(:euclidean, :manhattan, :minkowski, :hist_intersect, :hellinger, :chi_square, :kullback_leibler)
Algorithm = enum(:algorithm, [:linear, :kdtree, :kmeans, :composite, :kdtree_single, :hierarchical, :lsh, :kdtree_cuda, :saved, 254, :autotuned, 255])
CentersInit = enum(:centers_init, [:random, :gonzales, :kmeanspp])
LogLevel = enum(:log_level, [:none, :fatal, :error, :warn, :info, :debug])

# Note that Hamming and beyond are not supported in the C API. We include them here just in case of future improvements.
DistanceType = enum(:distance_type, [:undefined, :euclidean, :l2, :manhattan, :l1, :minkowski, :max, :hist_intersect, :hellinger, :chi_square, :kullback_leibler, :hamming, :hamming_lut, :hamming_popcnt, :l2_simple])

# For NMatrix compatibility
typedef :float, :float32
typedef :double, :float64
typedef :float, :float32
typedef :double, :float64
typedef :char, :byte
typedef :pointer, :index_params_ptr
typedef :pointer, :index_ptr

Expand Down Expand Up @@ -123,8 +126,10 @@ def dtype_to_c d #:nodoc:
# Allocates index space and distance space for storing results from various searches. For a k-nearest neighbors
# search, for example, you want trows (the number of rows in the testset) times k (the number of nearest neighbors
# being searched for).
#
# Note that c_type will produce float for everything except double, which produces double.
def allocate_results_space result_size, c_type #:nodoc:
[FFI::MemoryPointer.new(:int, result_size), FFI::MemoryPointer.new(c_type, result_size)]
[FFI::MemoryPointer.new(:int, result_size), FFI::MemoryPointer.new(c_type == :double ? :double : :float, result_size)]
end


Expand Down Expand Up @@ -175,15 +180,31 @@ def nearest_neighbors dataset, testset, k, parameters = {}
indices_int_ptr, distances_t_ptr, k, parameters_ptr

# Return results: two arrays, one of indices and one of distances.
[indices_int_ptr.read_array_of_int(result_size), distances_t_ptr.read_array_of_float(result_size)]
[indices_int_ptr.read_array_of_int(result_size),
c_type == :double ? distances_t_ptr.read_array_of_double(result_size) : distances_t_ptr.read_array_of_float(result_size)]
end
alias :nn :nearest_neighbors

# Set the distance function to use when computing distances between data points.
def set_distance_type! distance_function, order = 0
Flann.send(:flann_set_distance_type, distance_function, order)
def set_distance_type! distance_function
Flann.send(:flann_set_distance_type, distance_function, get_distance_order)
self
end
alias :set_distance_type_and_order! :set_distance_type!

# Get the distance type and order
def get_distance_type_and_order
[Flann.flann_get_distance_type, Flann.flann_get_distance_order]
end
def get_distance_type
Flann.flann_get_distance_type
end
def get_distance_order
Flann.flann_get_distance_order
end
alias :distance_type :get_distance_type
alias :distance_order :get_distance_order


# Perform hierarchical clustering of a set of points.
#
Expand Down Expand Up @@ -249,7 +270,9 @@ def cluster dataset, clusters, parameters = {}
attach_function :flann_free_index_float, [:index_ptr, :index_params_ptr], :int
attach_function :flann_free_index_double, [:index_ptr, :index_params_ptr], :int

attach_function :flann_set_distance_type, [DistanceType, :int], :void
attach_function :flann_set_distance_type, [:distance_type, :int], :void
attach_function :flann_get_distance_type, [], :distance_type
attach_function :flann_get_distance_order, [], :int

attach_function :flann_compute_cluster_centers_byte, [:pointer, :int, :int, :int, :pointer, :index_params_ptr], :int
attach_function :flann_compute_cluster_centers_int, [:pointer, :int, :int, :int, :pointer, :index_params_ptr], :int
Expand Down
36 changes: 25 additions & 11 deletions src/ruby/lib/flann/index.rb
Expand Up @@ -29,7 +29,9 @@ class FFI::Pointer
class << self
def new_from_nmatrix nm
raise(StorageError, "dense storage expected") unless nm.dense?
::FFI::Pointer.new(nm.data_pointer).tap { |p| p.autorelease = false }
c_type = Flann::dtype_to_c(nm.dtype)
c_type = :uchar if c_type == :byte
::FFI::Pointer.new(c_type, nm.data_pointer).tap { |p| p.autorelease = false }
end
end
end
Expand All @@ -43,9 +45,11 @@ class Index
#
# * https://github.com/mariusmuja/flann/tree/master/src/cpp/flann/algorithms
#
def initialize dataset = nil, dtype: :float64, parameters: Flann::Parameters::DEFAULT
@dataset = dataset
@dtype = (!dataset.nil? && dataset.is_a?(NMatrix)) ? dataset.dtype : dtype
def initialize index_dataset = nil, dtype: :float64, parameters: Flann::Parameters::DEFAULT
@dataset = index_dataset
#require 'pry'
#binding.pry if @dataset.nil?
@dtype = (!index_dataset.nil? && index_dataset.is_a?(NMatrix)) ? index_dataset.dtype : dtype
@index_ptr = nil

@parameters_ptr, @parameters = Flann::handle_parameters(parameters)
Expand All @@ -55,17 +59,24 @@ def initialize dataset = nil, dtype: :float64, parameters: Flann::Parameters::DE
attr_reader :dtype, :dataset, :parameters, :parameters_ptr, :index_ptr

# Assign a new dataset. Requires that the old index be freed.
def dataset= new_dataset
def dataset= index_dataset
free!
@dataset = index_dataset
end

# Build an index
def build!
raise("no dataset specified") if dataset.nil?

c_method = "flann_build_index_#{Flann::dtype_to_c(dtype)}".to_sym
c_type = Flann::dtype_to_c(dtype)
c_method = "flann_build_index_#{c_type}".to_sym
speedup_float_ptr = FFI::MemoryPointer.new(:float)
@index_ptr = Flann.send(c_method, FFI::Pointer.new_from_nmatrix(dataset), dataset.shape[0], dataset.shape[1], speedup_float_ptr, parameters_ptr)
if index_ptr.address == 0
require 'pry'
binding.pry
raise("failed to allocate index_ptr")
end


# Return the speedup
speedup_float_ptr.read_float
Expand All @@ -91,12 +102,14 @@ def nearest_neighbors testset, k, parameters = {}
k,
parameters_ptr

[indices_int_ptr.read_array_of_int(result_size), distances_t_ptr.read_array_of_float(result_size)]

[indices_int_ptr.read_array_of_int(result_size),
c_type == :double ? distances_t_ptr.read_array_of_double(result_size) : distances_t_ptr.read_array_of_float(result_size)]
end

# Perform a radius search on a single query point
def radius_search query, radius, parameters = {}
max_k = parameters[:max_neighbors] || dataset.shape[1]
def radius_search query, radius, max_k=nil, parameters = {}
max_k ||= dataset.shape[1]
parameters = Parameters.new(Flann::Parameters::DEFAULT.merge(parameters))

self.build! if index_ptr.nil?
Expand All @@ -109,7 +122,8 @@ def radius_search query, radius, parameters = {}
Flann.send(c_method, index_ptr, FFI::Pointer.new_from_nmatrix(query), indices_int_ptr, distances_t_ptr, max_k, radius, parameters_ptr)

# Return results: two arrays, one of indices and one of distances.
[indices_int_ptr.read_array_of_int(max_k), distances_t_ptr.read_array_of_float(max_k)]
[indices_int_ptr.read_array_of_int(max_k),
c_type == :double ? distances_t_ptr.read_array_of_double(max_k) : distances_t_ptr.read_array_of_float(max_k)]
end

# Save an index to a file (without the dataset).
Expand Down
9 changes: 6 additions & 3 deletions src/ruby/spec/flann_spec.rb
Expand Up @@ -68,15 +68,18 @@

context "#set_distance_type!" do
it "sets the distance functor without error" do
pending "distance type unsupported in the C bindings, use the C++ bindings instead"
#pending "distance type unsupported in the C bindings, use the C++ bindings instead"
Flann.set_distance_type! :euclidean
d = Flann.get_distance_type
expect(d).to eq(:euclidean)
end
end

[:byte, :int32, :float32, :float64].each do |dtype|
before :each do
@dataset = NMatrix.random([1000,128], dtype: dtype)
@testset = NMatrix.random([100,128], dtype: dtype)
scale = [:byte, :int32, :int64].include?(dtype) ? 255 : 1.0
@dataset = NMatrix.random([1000,128], dtype: dtype, scale: scale)
@testset = NMatrix.random([100,128], dtype: dtype, scale: scale)
end

context "#nearest_neighbors" do
Expand Down
119 changes: 64 additions & 55 deletions src/ruby/spec/index_spec.rb
Expand Up @@ -29,61 +29,70 @@


describe Flann::Index do

before :each do
@dataset = NMatrix.random([1000,128])
@testset = NMatrix.random([100,128])
@index = Flann::Index.new(@dataset) do |t|
t[:algorithm] = :kdtree
t[:trees] = 4
end
@index.build!
end


context "#build!" do
it "builds a kdtree index with block parameters" do
# Empty: handled in :each, above
end
end


context "#nearest_neighbors" do
it "runs without error" do
@index.nearest_neighbors @testset, 5
[:byte, :float32, :float64].each do |dtype|
context dtype.inspect do
before :each do
scale = [:byte, :int32, :int64].include?(dtype) ? 255 : 1.0

@dataset = NMatrix.random([1000,128], dtype: dtype, scale: scale)
@testset = NMatrix.random([100,128], dtype: dtype, scale: scale)
@index = Flann::Index.new(@dataset) do |t|
t[:algorithm] = :kdtree
t[:trees] = 4
end
@index.build!
end


context "#build!" do
it "builds a kdtree index with block parameters" do
scale = [:byte, :int].include?(dtype) ? 255 : 1.0

@dataset = NMatrix.random([1000,128], dtype: dtype, scale: scale)
@testset = NMatrix.random([100,128], dtype: dtype, scale: scale)
@index = Flann::Index.new(@dataset) do |t|
t[:algorithm] = :kdtree
t[:trees] = 4
end
@index.build!
end
end


context "#nearest_neighbors" do
it "runs without error" do
@index.nearest_neighbors @testset, 5
end
end


context "#radius_search" do
it "runs without error" do
query = NMatrix.random([1,128])
@index.radius_search query, 0.4
end
end


context "#save" do
it "saves an index to a file which can be loaded again" do
FileUtils.rm("temp_index.save_file", :force => true)
@index.save("temp_index.save_file")

raise(IOError, "save failed") unless File.exists?("temp_index.save_file")

post_index = Flann::Index.new(@dataset)
post_index.load!("temp_index.save_file")
FileUtils.rm("temp_index.save_file", :force => true)
end
end


context "#free!" do
it "frees an index" do
@index.free!
end
end
end
end


context "#radius_search" do
it "runs without error" do
query = NMatrix.random([1,128])
@index.radius_search query, 0.4
end
end


context "#save" do
it "saves an index to a file which can be loaded again" do
FileUtils.rm("temp_index.save_file", :force => true)
@index.save("temp_index.save_file")

raise(IOError, "save failed") unless File.exists?("temp_index.save_file")

post_index = Flann::Index.new(@dataset)
post_index.load!("temp_index.save_file")
FileUtils.rm("temp_index.save_file", :force => true)
end
end


context "#free!" do
it "frees an index" do
@index.free!
end
end




end
1 change: 1 addition & 0 deletions src/ruby/spec/spec_helper.rb
Expand Up @@ -26,5 +26,6 @@
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

require 'rspec'
require 'rspec/longrun'

require "./lib/flann"

0 comments on commit 7694b11

Please sign in to comment.