Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Squash of dm-core ~> 0.10.2 work in progress.

  • Loading branch information...
commit 0b4f321451c2228214247906cf420d6d8ecb2b47 1 parent 38250f6
@shanna authored
Showing with 527 additions and 1,821 deletions.
  1. +2 −0  .gitignore
  2. +11 −0 Gemfile
  3. +11 −22 lib/dm-sphinx-adapter.rb
  4. +49 −194 lib/dm-sphinx-adapter/adapter.rb
  5. +0 −83 lib/dm-sphinx-adapter/attribute.rb
  6. +0 −19 lib/dm-sphinx-adapter/collection.rb
  7. +36 −0 lib/dm-sphinx-adapter/connection.rb
  8. +0 −38 lib/dm-sphinx-adapter/index.rb
  9. +28 −63 lib/dm-sphinx-adapter/query.rb
  10. +64 −0 lib/dm-sphinx-adapter/query/conditions.rb
  11. +36 −0 lib/dm-sphinx-adapter/query_factory.rb
  12. +0 −120 lib/dm-sphinx-adapter/resource.rb
  13. +19 −0 lib/dm-sphinx-adapter/search.rb
  14. +48 −0 lib/dm-sphinx-adapter/search/extended2.rb
  15. +42 −0 lib/dm-sphinx-adapter/search/filters.rb
  16. +16 −0 lib/dm-sphinx-adapter/search/mode.rb
  17. +48 −0 lib/dm-sphinx-adapter/search/statement.rb
  18. +0 −94 lib/dm-sphinx-adapter/xmlpipe2.rb
  19. +0 −28 lib/riddle.rb
  20. +0 −619 lib/riddle/client.rb
  21. +0 −53 lib/riddle/client/filter.rb
  22. +0 −65 lib/riddle/client/message.rb
  23. +0 −84 lib/riddle/client/response.rb
  24. +5 −18 test/files/sphinx.conf
  25. +26 −19 test/helper.rb
  26. +17 −55 test/test_adapter.rb
  27. +0 −36 test/test_attribute.rb
  28. +0 −26 test/test_collection.rb
  29. +0 −30 test/test_index.rb
  30. +0 −61 test/test_query.rb
  31. +0 −17 test/test_resource.rb
  32. +69 −0 test/test_search_extended2.rb
  33. +0 −77 test/test_xmlpipe2.rb
View
2  .gitignore
@@ -1,2 +1,4 @@
*.DS_Store
*.swp
+bin/*
+gems/*
View
11 Gemfile
@@ -0,0 +1,11 @@
+# vim: syntax=ruby
+bundle_path 'gems'
+disable_rubygems
+disable_system_gems
+
+gem 'dm-core', '~> 0.10.2'
+gem 'riddle', '~> 1.0.9'
+
+gem 'nokogiri', :only => :xmlpipe2
+gem 'shoulda', :only => :development
+
View
33 lib/dm-sphinx-adapter.rb
@@ -1,23 +1,12 @@
-require 'rubygems'
+require 'dm-core'
+require 'dm-sphinx-adapter/connection'
+
+# The adapter propper.
+require 'dm-sphinx-adapter/query_factory'
+require 'dm-sphinx-adapter/query'
+require 'dm-sphinx-adapter/query/conditions'
+require 'dm-sphinx-adapter/search'
+require 'dm-sphinx-adapter/search/extended2'
+require 'dm-sphinx-adapter/search/filters'
+require 'dm-sphinx-adapter/adapter'
-# TODO: Hide the shitload of dm-core warnings or at least try to?
-old_verbose, $VERBOSE = $VERBOSE, nil
- gem 'dm-core', '~> 0.9.8'
- require 'dm-core'
-$VERBOSE = old_verbose
-
-require 'pathname'
-lib = Pathname(__FILE__).dirname.expand_path
-dir = lib / 'dm-sphinx-adapter'
-
-# Bundled Riddle since the gem is very old and we don't need any of the config generation stuff.
-$:.unshift lib
-require 'riddle'
-
-# TODO: Require farms suck. Do something about it.
-require dir / 'adapter'
-require dir / 'attribute'
-require dir / 'collection'
-require dir / 'index'
-require dir / 'query'
-require dir / 'resource'
View
243 lib/dm-sphinx-adapter/adapter.rb
@@ -1,200 +1,55 @@
module DataMapper
- module Adapters
- module Sphinx
- # == Synopsis
- #
- # DataMapper uses URIs or a connection has to connect to your data-stores. In this case the sphinx search daemon
- # <tt>searchd</tt>.
- #
- # On its own this adapter will only return an array of document hashes when queried. The DataMapper library dm-more
- # however provides dm-is-searchable, a common interface to search one adapter and load documents from another. My
- # preference is to use this adapter in tandem with dm-is-searchable.
- #
- # Like all DataMapper adapters you can connect with a Hash or URI.
- #
- # A URI:
- # DataMapper.setup(:search, 'sphinx://localhost')
- #
- # The breakdown is:
- # "#{adapter}://#{host}:#{port}/#{config}"
- # - adapter Must be :sphinx
- # - host Hostname (default: localhost)
- # - port Optional port number (default: 3312)
- #
- # Alternatively supply a Hash:
- # DataMapper.setup(:search, {
- # :adapter => 'sphinx', # required
- # :host => 'localhost', # optional. Default: localhost
- # :port => 3312 # optional. Default: 3312
- # })
- class Adapter < AbstractAdapter
-
- # ==== See
- # * DataMapper::Adapters::AbstractAdapter
- #
- # ==== Parameters
- # uri_or_options<URI, DataObject::URI, Addressable::URI, String, Hash, Pathname>::
- # DataMapper uri or options hash.
- def initialize(name, uri_or_options)
- super # Set up defaults.
- @options = normalize_options(uri_or_options)
- end
-
- def create(resources) #:nodoc:
- 0
- end
-
- def delete(query) #:nodoc:
- 0
- end
-
- # Query your Sphinx repository and return all matching documents.
- #
- # ==== Notes
- #
- # These methods are public but normally called indirectly through DataMapper::Resource#get,
- # DataMapper::Resource#first or DataMapper::Resource#all.
- #
- # The document hashes returned are those from Riddle::Client.
- #
- # ==== Parameters
- # query<DataMapper::Query>:: The query object.
- #
- # ==== Returns
- # Array<Hash>:: An array of document hashes. <tt>[{:id => 1, ...}, {:id => 2, ...}]</tt>
- # Array<>:: An empty array if no documents match.
- def read_many(query)
- read(query)
- end
-
- # Query your Sphinx repository and return the first document matched.
- #
- # ==== Notes
- #
- # These methods are public but normally called indirectly through DataMapper::Resource#get,
- # DataMapper::Resource#first or DataMapper::Resource#all.
- #
- # ==== Parameters
- # query<DataMapper::Query>:: The query object.
- #
- # ==== Returns
- # Hash:: An document hash of the first document matched. <tt>{:id => 1, ...}</tt>
- # Nil:: If no documents match.
- def read_one(query)
- read(query).first
- end
-
- protected
- # List sphinx indexes to search.
- #
- # If no indexes are explicitly declared using DataMapper::Adapters::Sphinx::Resource then the default storage
- # name is used.
- #
- # ==== See
- # * DataMapper::Adapters::Sphinx::Resource::ClassMethods#sphinx_indexes
- #
- # ==== Parameters
- # model<DataMapper::Model>:: The DataMapper::Model.
- #
- # ==== Returns
- # Array<DataMapper::Adapters::Sphinx::Index>:: Index objects from the model.
- def indexes(query)
- indexes = query.model.sphinx_indexes(name) if query.model.respond_to?(:sphinx_indexes)
- if indexes.nil? or indexes.empty?
- indexes = [Index.new(query.model, query.model.storage_name(name))]
- end
- indexes
+ module Sphinx
+ class Adapter < DataMapper::Adapters::AbstractAdapter
+ #--
+ # TODO: Error.
+ # TODO: Wait for live indexing or someone to write a gateway/manager that will buffer and fake it?
+ # Steal Solr's restful api for bonus points and create dm-search-adapter for Sphinx, Solr etc.
+ def create(resources) #:nodoc:
+ 0
+ end
+
+ #--
+ # TODO: Allow updating of attributes.
+ def update(attributes, collection) #:nodoc:
+ 0
+ end
+
+ #--
+ # TODO: Error.
+ def delete(query) #:nodoc:
+ 0
+ end
+
+ def read(query)
+ with_connection do |client|
+ # TODO: Yuck. I can't really ditch the Search struct withou having naming collisions in Query but it's causing
+ # some ugly and non obvious method chains here.
+ client.match_mode = query.search.search.slug
+ client.filters = query.search.filters.statement
+ client.limit = query.limit.to_i if query.limit
+ client.offset = query.offset.to_i if query.offset
+ # TODO: Ordering (this is where I would get classes with the Query order methods).
+
+ client.query(query.search.search.statement, query.model.storage_name, '').map do |record|
+ query.fields.zip(record[:attributes]).to_hash
end
-
- # Query sphinx for a list of document IDs.
- #
- # ==== Parameters
- # query<DataMapper::Query>:: The query object.
- #
- # ==== Returns
- # Array<Hash>:: An array of document hashes. <tt>[{:id => 1, ...}, {:id => 2, ...}]</tt>
- # Array<>:: An empty array if no documents match.
- def read(query)
- from = indexes(query).map{|index| index.name}.join(', ')
- search = Sphinx::Query.new(query).to_s
- client = Riddle::Client.new(@options[:host], @options[:port])
-
- # You can set some options that aren't set by the adapter.
- @options.except(:host, :port, :match_mode, :limit, :offset, :sort_mode, :sort_by).each do |k, v|
- client.method("#{k}=".to_sym).call(v) if client.respond_to?("#{k}=".to_sym)
- end
-
- client.match_mode = :extended
- client.filters = search_filters(query) # By attribute.
- client.limit = query.limit.to_i if query.limit
- client.offset = query.offset.to_i if query.offset
-
- if order = search_order(query)
- client.sort_mode = :extended
- client.sort_by = order
- end
-
- result = client.query(search, from)
- raise result[:error] unless result[:error].nil?
-
- DataMapper.logger.info(
- %q{Sphinx (%.3f): search '%s' in '%s' found %d documents} % [result[:time], search, from, result[:total]]
- )
- # TODO: Confusing, call it something other than collection?
- Collection.new(result)
- # result[:matches].map{|doc| doc[:id] = doc[:doc]; doc}
- end
-
-
- # Riddle search filters for attributes.
- def search_filters(query) #:nodoc:
- filters = []
- query.conditions.each do |operator, attribute, value|
- next unless attribute.kind_of? Sphinx::Attribute
- filters << case operator
- when :eql, :like then attribute.filter(value)
- when :not then attribute.filter(value, false)
- else raise NotImplementedError.new("Sphinx: Query attributes do not support the #{operator} operator")
- end
- end
- filters
- end
-
- # TODO: How do you tell the difference between the default query order and someone explicitly asking for
- # sorting by the primary key? I don't think you can at the moment.
- def search_order(query) #:nodoc:
- by = []
- query.order.each do |order|
- next unless order.property.kind_of? Sphinx::Attribute
- by << [order.property.field, order.direction].join(' ')
- end
- by.empty? ? nil : by.join(', ')
- end
-
- # Coerce +uri_or_options+ into a +Hash+ of options.
- #
- # ==== Parameters
- # uri_or_options<URI, DataObject::URI, Addressable::URI, String, Hash, Pathname>::
- # DataMapper uri or options hash.
- #
- # ==== Returns
- # Hash
- def normalize_options(uri_or_options)
- case uri_or_options
- when String, Addressable::URI then DataObjects::URI.parse(uri_or_options).attributes
- when DataObjects::URI then uri_or_options.attributes
- when Pathname then {:path => uri_or_options}
- else
- uri_or_options[:path] ||= uri_or_options.delete(:config) || uri_or_options.delete(:database)
- uri_or_options
- end
+ end
+ end
+
+ protected
+ def with_connection
+ begin
+ connection = Connection.new(@options[:host], @options[:port])
+ yield connection
+ ensure
+ connection.dispose unless connection.nil?
end
+ end
+ end # Adapter
+ end # Sphinx
- end # Adapter
- end # Sphinx
-
- # Keep magic in DataMapper#setup happy.
- SphinxAdapter = Sphinx::Adapter
- end # Adapters
+ Adapters::SphinxAdapter = DataMapper::Sphinx::Adapter
+ Adapters.const_added(:SphinxAdapter)
end # DataMapper
View
83 lib/dm-sphinx-adapter/attribute.rb
@@ -1,83 +0,0 @@
-require 'date'
-require 'time'
-
-module DataMapper
- module Adapters
- module Sphinx
-
- # Sphinx attribute definition.
- #
- # You must declare attributes as such if you want to use them for sorting or conditions.
- #
- # ==== Notes
- # The following primatives will be used as sql_attr_* types. Some liberty has been taken to accommodate for as
- # many DM primitives as possible.
- #
- # TrueClass:: sql_attr_bool
- # String:: sql_attr_str2ordinal
- # DataMapper::Types::Text:: sql_attr_str2ordinal
- # Float:: sql_attr_float
- # Integer:: sql_attr_uint
- # BigDecimal:: sql_attr_float
- # DateTime:: sql_attr_timestamp
- # Date:: sql_attr_timestamp
- # Time:: sql_attr_timestamp
- # DataMapper::Types::Serial:: sql_attr_uint
- class Attribute < Property
-
- # DataMapper types supported as Sphinx attributes.
- TYPES = [
- TrueClass, # sql_attr_bool
- String, # sql_attr_str2ordinal
- DataMapper::Types::Text, # sql_attr_str2ordinal
- Float, # sql_attr_float
- Integer, # sql_attr_uint
- BigDecimal, # sql_attr_float
- DateTime, # sql_attr_timestamp
- Date, # sql_attr_timestamp
- Time, # sql_attr_timestamp
- # Object,
- # Class,
- # DataMapper::Types::Discriminator,
- DataMapper::Types::Serial # sql_attr_uint
- ]
-
- # Create a riddle client filter from a value.
- #
- # ==== Parameters
- # value<Object>::
- # The filter value to typecast and include/exclude.
- #
- # inclusive<Boolean>::
- # Include or exclude results matching the filter value. Default: inclusive (true).
- #
- # ==== Returns
- # Riddle::Client::Filter::
- def filter(value, inclusive = true)
- # Riddle uses exclusive = false as the default which doesn't read well IMO. Nobody says "Yes I don't want
- # these values" you say "No I don't want these values".
- value = typecast(value)
- value = [value] unless value.quacks_like?([Array, Range])
- Riddle::Client::Filter.new(field, value, !inclusive)
- end
-
- # Typecasts the value into a sphinx primitive. Supports ranges or arrays of values.
- #
- # ==== Notes
- # Some loss of precision may occur when casting BigDecimal to Float.
- def typecast(value)
- if value.kind_of?(Range) then Range.new(typecast(value.first), typecast(value.last))
- elsif value.kind_of?(Array) then value.map{|v| typecast(v)}
- elsif primitive == BigDecimal then super(value).to_f
- elsif primitive == DateTime then Time.parse(super(value).to_s).to_i
- elsif primitive == Date then Time.parse(super(value).to_s).to_i
- elsif primitive == Time then super(value).to_i
- else
- super(value) # Good luck
- end
- end
-
- end # Attribute
- end # Sphinx
- end # Adapters
-end # DataMapper
View
19 lib/dm-sphinx-adapter/collection.rb
@@ -1,19 +0,0 @@
-module DataMapper
- module Adapters
- module Sphinx
- class Collection < Array
- attr_accessor :error, :time, :total, :words
-
- def initialize(result)
- # TODO: One liner that works in Ruby 1.x now #indexes is #keys?
- @error = result[:error]
- @time = result[:time]
- @total = result[:total]
- @words = result[:words]
- super result[:matches].map{|doc| doc[:id] = doc[:doc]; doc}
- end
-
- end
- end # Sphinx
- end # Adapters
-end # DataMapper
View
36 lib/dm-sphinx-adapter/connection.rb
@@ -0,0 +1,36 @@
+require 'riddle'
+
+module DataMapper
+ module Sphinx
+ class Connection < Riddle::Client
+ def initialize(host = 'localhost', port = '9312')
+ super
+ reset
+ end
+
+ def dispose
+ reset && true
+ end
+
+ def reset
+ super
+ @match_mode = :extended2
+ end
+
+ def query(*args)
+ result = super
+
+ # TODO: connection.logger.info(
+ # %q{'(%.3f): '%s' in '%s' found %d documents} % [result[:time], search, @index, result[:total]]
+ # )
+ # TODO: Connection.logger.warn(result[:warning]) unless result[:warning].blank?
+ # TODO: Connection.logger.error(result[:error]) unless result[:error].blank?
+ # TODO: Raise result[:error] also.
+ $stderr.puts 'WARNING: ' + result[:warning] unless result[:warnings].blank?
+ $stderr.puts 'ERROR: ' + result[:error] unless result[:error].blank?
+
+ result.fetch(:matches, [])
+ end
+ end # Connection
+ end # Sphinx
+end # DataMapper
View
38 lib/dm-sphinx-adapter/index.rb
@@ -1,38 +0,0 @@
-module DataMapper
- module Adapters
- module Sphinx
-
- # Sphinx index definition.
- class Index
- include Assertions
-
- # Options.
- attr_reader :model, :name, :options
-
- # ==== Parameters
- # model<DataMapper::Model>:: Your resources model.
- # name<Symbol, String>:: The index name.
- # options<Hash>:: Optional arguments.
- #
- # ==== Options
- # :delta<Boolean>::
- # Delta index. Delta indexes will be searched last when multiple indexes are defined for a
- # resource. Default is false.
- def initialize(model, name, options = {})
- assert_kind_of 'model', model, Model
- assert_kind_of 'name', name, Symbol, String
- assert_kind_of 'options', options, Hash
-
- @model = model
- @name = name.to_sym
- @delta = options.fetch(:delta, false)
- end
-
- # Is the index a delta index.
- def delta?
- !!@delta
- end
- end # Index
- end # Sphinx
- end # Adapters
-end # DataMapper
View
91 lib/dm-sphinx-adapter/query.rb
@@ -1,68 +1,33 @@
module DataMapper
- module Adapters
- module Sphinx
+ module Sphinx
+ # Extends DM::Query with the ability to cast itself as a DM::Sphinx::Search object.
+ class Query < DataMapper::Query
+ # The cast Search object.
+ attr_reader :search
- # Sphinx extended search query string from DataMapper query.
- class Query
- include Extlib::Assertions
+ #--
+ # TODO: Document extra :mode and :filters options.
+ # TODO: This still smells 'iffy.
+ def initialize(repository, model, options = {})
+ # The Query wouldn't pass validation if I didn't remove the extra arguments.
+ mode = options.delete(:mode)
+ filters = options.delete(:filters)
+ super
- # Initialize a new extended Sphinx query from a DataMapper::Query object.
- #
- # If the query has no conditions an '' empty string will be generated possibly triggering Sphinx's full scan
- # mode.
- #
- # ==== See
- # * http://www.sphinxsearch.com/doc.html#searching
- # * http://www.sphinxsearch.com/doc.html#conf-docinfo
- # * http://www.sphinxsearch.com/doc.html#extended-syntax
- #
- # ==== Raises
- # NotImplementedError:: DataMapper operators that can't be expressed in the extended sphinx query syntax.
- #
- # ==== Parameters
- # query<DataMapper::Query>:: DataMapper query object.
- def initialize(query)
- assert_kind_of 'query', query, DataMapper::Query
- @query = []
-
- if query.conditions.empty?
- @query << ''
- else
- query.conditions.each do |operator, property, value|
- next if property.kind_of? Sphinx::Attribute # Filters are added elsewhere.
- normalized = normalize_value(value)
- field = property.field(query.repository.name) unless operator == :raw
- @query << case operator
- when :eql, :like then '@%s "%s"' % [field.to_s, normalized.join(' ')]
- when :not then '@%s -"%s"' % [field.to_s, normalized.join(' ')]
- when :in then '@%s (%s)' % [field.to_s, normalized.map{|v| %{"#{v}"}}.join(' | ')]
- when :raw then "#{property}"
- else raise NotImplementedError.new("Sphinx: Query fields do not support the #{operator} operator")
- end
- end
- end
- end
-
- # ==== Returns
- # String:: The extended sphinx query string.
- def to_s
- @query.join(' ')
+ filters = Search::Filters.new(self.dup.clear.update(filters || {}))
+ search = case mode
+ when :extended2, nil then Search::Extended2.new(self)
+ # TODO: Modes.
+ # when :extended
+ # when :all
+ # when :any
+ # when :phrase
+ # when :boolean
+ else raise ArgumentError, "+options[:mode]+ used an unknown mode #{mode.inspect}."
end
-
- protected
- # Normalize and escape DataMapper query value(s) to escaped sphinx query values.
- #
- # ==== Parameters
- # value<String, Array>:: The query value.
- #
- # ==== Returns
- # Array:: An array of one or more query values.
- def normalize_value(value)
- [value].flatten.map do |v|
- v.to_s.gsub(/[\(\)\|\-!@~"&\/]/){|char| "\\#{char}"}
- end
- end
- end # Query
- end # Sphinx
- end # Adapters
+ @search = Search.new(search, filters)
+ end
+ end # Query
+ end # Sphinx
end # DataMapper
+
View
64 lib/dm-sphinx-adapter/query/conditions.rb
@@ -0,0 +1,64 @@
+module DataMapper
+ module Sphinx
+ class Query
+ #--
+ # Yeah I'm just hacking this stuff in. Fingers crossed you'll be able to legitimately add operators in the
+ # future.
+ module Conditions
+ module SizedComparison
+ def valid?
+ value.kind_of?(Array) && value.size == 2 && value.last.kind_of?(Integer)
+ end
+
+ def typecast(value)
+ [super(value[0]), value[1]]
+ end
+ end
+
+ #--
+ # subject.position => ['hello world', 50] # @subject[50] "hello world"
+ class PositionComparison < DataMapper::Query::Conditions::AbstractComparison
+ include SizedComparison
+ slug :position
+ end
+
+ #--
+ # subject.phrase => 'hello world' # @subject "hello world"
+ class PhraseComparison < DataMapper::Query::Conditions::AbstractComparison
+ slug :phrase
+ end
+
+ #--
+ # subject.proximity => ['hello world', 10] # @subject "hello world"~10
+ class ProximityComparison < DataMapper::Query::Conditions::AbstractComparison
+ include SizedComparison
+ slug :proximity
+ end
+
+ #--
+ # subject.quorum => ['hello world', 10] # @subject "hello world"/10
+ class QuorumComparison < DataMapper::Query::Conditions::AbstractComparison
+ include SizedComparison
+ slug :quorum
+ end
+
+ #--
+ # subject.exact => 'hello world' # @subject ="hello world"
+ class ExactComparison < DataMapper::Query::Conditions::AbstractComparison
+ slug :exact
+ end
+ end # Conditions
+ end # Query
+ end # Sphinx
+end # DataMapper
+
+# core_ext/symbol.rb
+class Symbol
+ [:position, :phrase, :proximity, :quorum, :exact].each do |sym|
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
+ def #{sym}
+ DataMapper::Query::Operator.new(self, #{sym.inspect})
+ end
+ RUBY
+ end
+end # class Symbol
View
36 lib/dm-sphinx-adapter/query_factory.rb
@@ -0,0 +1,36 @@
+#--
+# TODO: Ask about this monkey patch or alternatively I was thinking repository.create_query(*args) which would in turn
+# call the same method on the adapter. The command stuff in data objects works the same way so I don't see a good
+# argument against it other than the extra dispatch would be slightly slower.
+module DataMapper
+ class Query
+ extend Chainable
+
+ chainable do
+ def self.new(*args, &block)
+ super
+ end
+ end
+ end # Query
+end # DataMapper
+
+# Hijack the DataMapper::Query constructor allowing us to return a subclassed sphinx query object.
+module DataMapper
+ module Sphinx
+ module QueryFactory
+
+ def new(repository, *args)
+ if repository.adapter.is_a?(Sphinx::Adapter) && self == DataMapper::Query
+ Sphinx::Query.new(repository, *args)
+ else
+ super
+ end
+ end
+ end # QueryFactory
+ end # Sphinx
+
+ class Query
+ extend Sphinx::QueryFactory
+ end
+end # DataMapper
+
View
120 lib/dm-sphinx-adapter/resource.rb
@@ -1,120 +0,0 @@
-module DataMapper
- module Adapters
- module Sphinx
-
- # Declare Sphinx indexes and attributes in your resource.
- #
- # model Items
- # include DataMapper::SphinxResource
- #
- # # .. normal properties and such for :default
- #
- # repository(:search) do
- # # Query some_index, some_index_delta in that order.
- # index :some_index
- # index :some_index_delta, :delta => true
- #
- # # Sortable by some attributes.
- # attribute :updated_at, DateTime # sql_attr_timestamp
- # attribute :age, Integer # sql_attr_uint
- # attribute :deleted, Boolean # sql_attr_bool
- # end
- # end
- module Resource
-
- def self.append_inclusions(*inclusions)
- extra_inclusions.concat inclusions
- true
- end
-
- def self.extra_inclusions
- @extra_inclusions ||= []
- end
-
- def self.included(model) #:nodoc:
- model.send(:include, DataMapper::Resource)
- model.extend ClassMethods if defined?(ClassMethods)
- extra_inclusions.each{|inclusion| model.send(:include, inclusion)}
- end
-
- module ClassMethods
- def self.extended(model) #:nodoc:
- model.instance_variable_set(:@sphinx_indexes, {})
- model.instance_variable_set(:@sphinx_attributes, {})
- end
-
- # Defines a sphinx index on the resource.
- #
- # Indexes are naturally ordered, with delta indexes at the end of the list so that duplicate document IDs in
- # delta indexes override your main indexes.
- #
- # ==== See
- # * DataMapper::Adapters::Sphinx::Index
- #
- # ==== Parameters
- # name<Symbol>:: The name of a sphinx index to search for this resource.
- # options<Hash>:: A hash of available index options.
- def index(name, options = {})
- index = Index.new(self, name, options)
- indexes = sphinx_indexes(repository_name)
- indexes << index
-
- # TODO: I'm such a Ruby nub. In the meantime I've gone back to my Perl roots.
- # This is a Schwartzian transform to sort delta indexes to the bottom and natural sort by name.
- mapped = indexes.map{|i| [(i.delta? ? 1 : 0), i.name, i]}
- sorted = mapped.sort{|a, b| a[0] <=> b[0] || a[1] <=> b[1]}
- indexes.replace(sorted.map{|i| i[2]})
-
- index
- end
-
- # List of declared sphinx indexes for this model.
- #
- # ==== Returns
- # Array<DataMapper::Adapters::Sphinx::Index>
- def sphinx_indexes(repository_name = default_repository_name)
- @sphinx_indexes[repository_name] ||= []
- end
-
- # Defines a sphinx attribute on the resource.
- #
- # ==== See
- # DataMapper::Adapters::Sphinx::Attribute
- #
- # ==== Parameters
- # name<Symbol>:: The name of a sphinx attribute to order/restrict by for this resource.
- # type<Class>:: The type to define this attribute as.
- # options<Hash>:: An optional hash of attribute options.
- def attribute(name, type, options = {})
- # Attributes are just properties without a getter/setter in the model.
- # This keeps DataMapper::Query happy when building queries.
- attribute = Sphinx::Attribute.new(self, name, type, options)
- properties(repository_name)[attribute.name] = attribute
- attribute
- end
-
- # List of declared sphinx attributes for this model.
- #
- # ==== Returns
- # Array<DataMapper::Adapters::Sphinx::Attribute>
- def sphinx_attributes(repository_name = default_repository_name)
- properties(repository_name).find_all{|p| p.kind_of? Sphinx::Attribute}
- end
-
- # List of properties (aka sphinx fields).
- #
- # This list will be the inverse of properties not declared as attributes.
- # ==== Returns
- def sphinx_fields(repository_name = default_repository_name)
- properties(repository_name).reject{|p| p.kind_of? Sphinx::Attribute}
- end
-
- end # ClassMethods
- end # Resource
- end # Sphinx
- end # Adapters
-
- # Follow DM naming convention.
- SphinxResource = Adapters::Sphinx::Resource
-end # DataMapper
-
View
19 lib/dm-sphinx-adapter/search.rb
@@ -0,0 +1,19 @@
+module DataMapper
+ module Sphinx
+ class Search
+ include Extlib::Assertions
+ attr_reader :search, :filters
+
+ def initialize(search, filters)
+ assert_kind_of 'search', search, Search::Statement # TODO: Add Search::Mode to subclass.
+ assert_kind_of 'filters', filters, Search::Filters
+ @search, @filters = search, filters
+ end
+
+ def native?
+ search.native? && filters.native?
+ end
+ end # Search
+ end # Sphinx
+end # DataMapper
+
View
48 lib/dm-sphinx-adapter/search/extended2.rb
@@ -0,0 +1,48 @@
+require 'lib/dm-sphinx-adapter/search/mode'
+
+module DataMapper
+ module Sphinx
+ class Search
+ class Extended2 < Mode
+ include DataMapper::Sphinx::Query::Conditions
+
+ def slug
+ :extended2
+ end
+
+ protected
+ def operation_statement(operation)
+ expression = operation.map{|op| condition_statement(op)}.compact
+ return if expression.empty?
+
+ case operation
+ when NotOperation then ['!(', expression.join, ')'].join
+ when AndOperation then ['(', expression.join(' & '), ')'].join
+ when OrOperation then ['(', expression.join(' | '), ')'].join
+ end
+ end
+
+ #--
+ # TOOD: I really need a rule here about when a phrase is used or not.
+ def comparison_statement(comparison)
+ field = comparison.subject.field
+ value = comparison.value.dup
+ case comparison
+ when EqualToComparison then '@%s %s' % [field, quote(value)]
+ when InclusionComparison then '@%s (%s)' % [field, value.map{|v| quote(v)}.join('|')]
+ when PositionComparison then '@%s[%d] "%s"' % [field, value[1], quote(value[0])]
+ when PhraseComparison then '@%s "%s"' % [field, quote(value)]
+ when ProximityComparison then '@%s "%s"~%d' % [field, quote(value[0]), value[1]]
+ when QuorumComparison then '@%s "%s"/%d' % [field, quote(value[0]), value[1]]
+ when ExactComparison then '@%s ="%s"' % [field, quote(value)]
+ else fail_native("Comparison #{comparison.slug}'.") && return
+ end
+ end
+
+ def quote(value)
+ value.to_s.gsub(/[\(\)\|\-!@~"&\/]/){|char| "\\#{char}"}
+ end
+ end # Extended2
+ end # Search
+ end # Sphinx
+end # DataMapper
View
42 lib/dm-sphinx-adapter/search/filters.rb
@@ -0,0 +1,42 @@
+require 'lib/dm-sphinx-adapter/search/statement'
+
+module DataMapper
+ module Sphinx
+ class Search
+ class Filters < Statement
+ include DataMapper::Query::Conditions
+ include Extlib::Assertions
+
+ def statement
+ condition_statement(@query.conditions)
+ end
+
+ protected
+ def operation_statement(operation)
+ expression = operation.map{|op| condition_statement(op)}.compact
+ return if expression.empty?
+
+ case operation
+ when AndOperation then expression
+ when NotOperation
+ expression[2] = false # Ick?
+ expression
+ else # TODO: fail_native
+ end
+ end
+
+ def comparison_statement(comparison)
+ field = comparison.subject.field
+ value = comparison.value.dup
+ statement = case comparison
+ when EqualToComparison then [field, value, true]
+ # gt, lt and whatever else filters support.
+ else fail_native("Comparison #{comparison.slug}'.") && return
+ end
+
+ statement
+ end
+ end
+ end # Search
+ end # Sphinx
+end # DataMapper
View
16 lib/dm-sphinx-adapter/search/mode.rb
@@ -0,0 +1,16 @@
+require 'dm-sphinx-adapter/search/statement'
+
+module DataMapper
+ module Sphinx
+ class Search
+ # TODO: Move the modes factory from Sphinx::Query to here.
+ class Mode < Statement
+
+ # Symbol for each mode.
+ def slug
+ raise NotImplementedError
+ end
+ end # Mode
+ end # Search
+ end # Sphinx
+end # DataMapper
View
48 lib/dm-sphinx-adapter/search/statement.rb
@@ -0,0 +1,48 @@
+module DataMapper
+ module Sphinx
+ class Search
+ #--
+ # TODO: Not sold on the name at all.
+ class Statement
+ include DataMapper::Query::Conditions
+ include Extlib::Assertions
+
+ def initialize(query)
+ assert_kind_of 'query', query, DataMapper::Query
+ @query, @native = query, []
+ end
+
+ def native?
+ @native.empty?
+ end
+
+ def statement
+ condition_statement(@query.conditions) || ''
+ end
+
+ protected
+ def condition_statement(conditions)
+ case conditions
+ when AbstractOperation then operation_statement(conditions)
+ when AbstractComparison then comparison_statement(conditions)
+ end
+ end
+
+ # Abstract.
+ def operation_statement(operation)
+ raise NotImplementedError
+ end
+
+ # Abstract.
+ def comparison_statement(comparison)
+ raise NotImplementedError
+ end
+
+ def fail_native(why)
+ @native << why
+ end
+ end # Statement
+ end # Search
+ end # Sphinx
+end # DataMapper
+
View
94 lib/dm-sphinx-adapter/xmlpipe2.rb
@@ -1,94 +0,0 @@
-module DataMapper
- module Adapters
- module Sphinx
- require 'builder'
-
-
- # Sphinx xmlpipe2.
- #
- # Full text search data from any DM adapter without having to implement new Sphinx data sources drivers.
- #
- # ==== See
- # * http://www.sphinxsearch.com/docs/current.html#xmlpipe2
- #
- #--
- # TODO:
- # * Synopsis.
- module XmlPipe2
- def self.included(model)
- model.extend ClassMethods if defined?(ClassMethods)
- end
-
- module ClassMethods
-
- # Write a Sphinx xmlpipe2 XML stream to $stdout.
- #
- # ==== Parameters
- # source<String>:: The name of the repository to stream from.
- # destination<String>:: The name of the repository to stream to (contains your sphinx definition).
- # query<Hash>:: The conditions with which to find the records to stream.
- #--
- # TODO:
- # * in_memory_adapter doesn't call the super constructor so there is no field_naming_convention set in
- # DataMapper 0.9.10. Submit a patch or live with rescue and field.name clause?
- # * Keys that aren't called .id?
- # * Composite keys?
- # * Method for schema and documents.
- # * Less poking round in the internals of the :default adapter if I can?
- # * Destination should always be a dm-sphinx-adapter adapter.
- # * Optional schema since it overrides any schema you might define in the sphinx configuration.
- # * Schema default values from DM property default values.
- def xmlpipe2(source, destination = :default, query = {})
- builder = Builder::XmlMarkup.new(:target => $stdout)
- builder.instruct!
- builder.sphinx(:docset, :'xmlns:sphinx' => 'sphinx') do
-
- builder.sphinx(:schema) do
- sphinx_fields(destination).each do |field|
- builder.sphinx(:field, :name => (field.field(destination) rescue field.name))
- end
- sphinx_attributes(destination).each do |attr|
- builder.sphinx(:attr, {
- :name => (attr.field(destination) rescue attr.name),
- :type => xmlpipe2_type(attr.primitive)
- })
- end
- end
-
- all(query.merge(:repository => repository(source))).map do |resource|
- builder.sphinx(:document, :id => resource.id) do |document|
- properties(destination).each do |property|
- # TODO: Pretty sure this isn't the correct way to get and typecast.
- builder.tag!((property.field(destination) rescue property.name)) do |field|
- field.cdata!(property.typecast(property.get(resource)))
- end
- end
- end
- end
- end
- end
-
- private
- def xmlpipe2_type(primitive) #:nodoc:
- {
- Integer => 'int',
- Float => 'float',
- BigDecimal => 'float',
- DateTime => 'timestamp',
- Date => 'timestamp',
- Time => 'timestamp',
- TrueClass => 'bool',
- String => 'str2ordinal',
- DataMapper::Types::Text => 'str2ordinal'
- }[primitive]
- end
-
- end # ClassMethods
- end # XmlPipe2
-
- # Include XmlPipe2 in all DM::A::SphinxResource models when you require this file.
- Resource.append_inclusions XmlPipe2
- end # Sphinx
- end # Adapters
-end # DataMapper
-
View
28 lib/riddle.rb
@@ -1,28 +0,0 @@
-require 'socket'
-require 'timeout'
-
-require 'riddle/client'
-
-module Riddle #:nodoc:
- class ConnectionError < StandardError #:nodoc:
- end
-
- module Version #:nodoc:
- Major = 0
- Minor = 9
- Tiny = 8
- # Revision number for RubyForge's sake, taken from what Sphinx
- # outputs to the command line.
- Rev = 1533
- # Release number to mark my own fixes, beyond feature parity with
- # Sphinx itself.
- Release = 2
-
- String = [Major, Minor, Tiny].join('.')
- GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
- end
-
- def self.escape(string)
- string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
- end
-end
View
619 lib/riddle/client.rb
@@ -1,619 +0,0 @@
-require 'riddle/client/filter'
-require 'riddle/client/message'
-require 'riddle/client/response'
-
-module Riddle
- class VersionError < StandardError; end
- class ResponseError < StandardError; end
-
- # This class was heavily based on the existing Client API by Dmytro Shteflyuk
- # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
- # more Ruby-ish (ie. lowercase and underscored method names). I also have
- # used a few helper classes, just to neaten things up.
- #
- # Feel free to use it wherever. Send bug reports, patches, comments and
- # suggestions to pat at freelancing-gods dot com.
- #
- # Most properties of the client are accessible through attribute accessors,
- # and where relevant use symboles instead of the long constants common in
- # other clients.
- # Some examples:
- #
- # client.sort_mode = :extended
- # client.sort_by = "birthday DESC"
- # client.match_mode = :extended
- #
- # To add a filter, you will need to create a Filter object:
- #
- # client.filters << Riddle::Client::Filter.new("birthday",
- # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
- #
- class Client
- Commands = {
- :search => 0, # SEARCHD_COMMAND_SEARCH
- :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
- :update => 2, # SEARCHD_COMMAND_UPDATE
- :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
- }
-
- Versions = {
- :search => 0x113, # VER_COMMAND_SEARCH
- :excerpt => 0x100, # VER_COMMAND_EXCERPT
- :update => 0x101, # VER_COMMAND_UPDATE
- :keywords => 0x100 # VER_COMMAND_KEYWORDS
- }
-
- Statuses = {
- :ok => 0, # SEARCHD_OK
- :error => 1, # SEARCHD_ERROR
- :retry => 2, # SEARCHD_RETRY
- :warning => 3 # SEARCHD_WARNING
- }
-
- MatchModes = {
- :all => 0, # SPH_MATCH_ALL
- :any => 1, # SPH_MATCH_ANY
- :phrase => 2, # SPH_MATCH_PHRASE
- :boolean => 3, # SPH_MATCH_BOOLEAN
- :extended => 4, # SPH_MATCH_EXTENDED
- :fullscan => 5, # SPH_MATCH_FULLSCAN
- :extended2 => 6 # SPH_MATCH_EXTENDED2
- }
-
- RankModes = {
- :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
- :bm25 => 1, # SPH_RANK_BM25
- :none => 2, # SPH_RANK_NONE
- :wordcount => 3 # SPH_RANK_WORDCOUNT
- }
-
- SortModes = {
- :relevance => 0, # SPH_SORT_RELEVANCE
- :attr_desc => 1, # SPH_SORT_ATTR_DESC
- :attr_asc => 2, # SPH_SORT_ATTR_ASC
- :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
- :extended => 4, # SPH_SORT_EXTENDED
- :expr => 5 # SPH_SORT_EXPR
- }
-
- AttributeTypes = {
- :integer => 1, # SPH_ATTR_INTEGER
- :timestamp => 2, # SPH_ATTR_TIMESTAMP
- :ordinal => 3, # SPH_ATTR_ORDINAL
- :bool => 4, # SPH_ATTR_BOOL
- :float => 5, # SPH_ATTR_FLOAT
- :multi => 0x40000000 # SPH_ATTR_MULTI
- }
-
- GroupFunctions = {
- :day => 0, # SPH_GROUPBY_DAY
- :week => 1, # SPH_GROUPBY_WEEK
- :month => 2, # SPH_GROUPBY_MONTH
- :year => 3, # SPH_GROUPBY_YEAR
- :attr => 4, # SPH_GROUPBY_ATTR
- :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
- }
-
- FilterTypes = {
- :values => 0, # SPH_FILTER_VALUES
- :range => 1, # SPH_FILTER_RANGE
- :float_range => 2 # SPH_FILTER_FLOATRANGE
- }
-
- attr_accessor :server, :port, :offset, :limit, :max_matches,
- :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
- :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
- :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
- :max_query_time, :field_weights, :timeout
- attr_reader :queue
-
- # Can instantiate with a specific server and port - otherwise it assumes
- # defaults of localhost and 3312 respectively. All other settings can be
- # accessed and changed via the attribute accessors.
- def initialize(server=nil, port=nil)
- @server = server || "localhost"
- @port = port || 3312
-
- reset
-
- @queue = []
- end
-
- # Reset attributes and settings to defaults.
- def reset
- # defaults
- @offset = 0
- @limit = 20
- @max_matches = 1000
- @match_mode = :all
- @sort_mode = :relevance
- @sort_by = ''
- @weights = []
- @id_range = 0..0
- @filters = []
- @group_by = ''
- @group_function = :day
- @group_clause = '@group desc'
- @group_distinct = ''
- @cut_off = 0
- @retry_count = 0
- @retry_delay = 0
- @anchor = {}
- # string keys are index names, integer values are weightings
- @index_weights = {}
- @rank_mode = :proximity_bm25
- @max_query_time = 0
- # string keys are field names, integer values are weightings
- @field_weights = {}
- @timeout = 0
- end
-
- # Set the geo-anchor point - with the names of the attributes that contain
- # the latitude and longitude (in radians), and the reference position.
- # Note that for geocoding to work properly, you must also set
- # match_mode to :extended. To sort results by distance, you will
- # need to set sort_mode to '@geodist asc' for example. Sphinx
- # expects latitude and longitude to be returned from you SQL source
- # in radians.
- #
- # Example:
- # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
- #
- def set_anchor(lat_attr, lat, long_attr, long)
- @anchor = {
- :latitude_attribute => lat_attr,
- :latitude => lat,
- :longitude_attribute => long_attr,
- :longitude => long
- }
- end
-
- # Append a query to the queue. This uses the same parameters as the query
- # method.
- def append_query(search, index = '*', comments = '')
- @queue << query_message(search, index, comments)
- end
-
- # Run all the queries currently in the queue. This will return an array of
- # results hashes.
- def run
- response = Response.new request(:search, @queue)
-
- results = @queue.collect do
- result = {
- :matches => [],
- :fields => [],
- :attributes => {},
- :attribute_names => [],
- :words => {}
- }
-
- result[:status] = response.next_int
- case result[:status]
- when Statuses[:warning]
- result[:warning] = response.next
- when Statuses[:error]
- result[:error] = response.next
- next result
- end
-
- result[:fields] = response.next_array
-
- attributes = response.next_int
- for i in 0...attributes
- attribute_name = response.next
- type = response.next_int
-
- result[:attributes][attribute_name] = type
- result[:attribute_names] << attribute_name
- end
-
- matches = response.next_int
- is_64_bit = response.next_int
- for i in 0...matches
- doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
- weight = response.next_int
-
- result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
- result[:attribute_names].each do |attr|
- result[:matches].last[:attributes][attr] = attribute_from_type(
- result[:attributes][attr], response
- )
- end
- end
-
- result[:total] = response.next_int.to_i || 0
- result[:total_found] = response.next_int.to_i || 0
- result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
-
- words = response.next_int
- for i in 0...words
- word = response.next
- docs = response.next_int
- hits = response.next_int
- result[:words][word] = {:docs => docs, :hits => hits}
- end
-
- result
- end
-
- @queue.clear
- results
- end
-
- # Query the Sphinx daemon - defaulting to all indexes, but you can specify
- # a specific one if you wish. The search parameter should be a string
- # following Sphinx's expectations.
- #
- # The object returned from this method is a hash with the following keys:
- #
- # * :matches
- # * :fields
- # * :attributes
- # * :attribute_names
- # * :words
- # * :total
- # * :total_found
- # * :time
- # * :status
- # * :warning (if appropriate)
- # * :error (if appropriate)
- #
- # The key <tt>:matches</tt> returns an array of hashes - the actual search
- # results. Each hash has the document id (<tt>:doc</tt>), the result
- # weighting (<tt>:weight</tt>), and a hash of the attributes for the
- # document (<tt>:attributes</tt>).
- #
- # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
- # fields and attributes for the documents. The key <tt>:attributes</tt>
- # will return a hash of attribute name and type pairs, and <tt>:words</tt>
- # returns a hash of hashes representing the words from the search, with the
- # number of documents and hits for each, along the lines of:
- #
- # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
- #
- # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
- # number of matches available, the total number of matches (which may be
- # greater than the maximum available, depending on the number of matches
- # and your sphinx configuration), and the time in milliseconds that the
- # query took to run.
- #
- # <tt>:status</tt> is the error code for the query - and if there was a
- # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
- # will be described under <tt>:error</tt>.
- #
- def query(search, index = '*', comments = '')
- @queue << query_message(search, index, comments)
- self.run.first
- end
-
- # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
- # They may also be abbreviated to fit within a word limit.
- #
- # As part of the options hash, you will need to
- # define:
- # * :docs
- # * :words
- # * :index
- #
- # Optional settings include:
- # * :before_match (defaults to <span class="match">)
- # * :after_match (defaults to </span>)
- # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
- # * :limit (defaults to 256)
- # * :around (defaults to 5)
- # * :exact_phrase (defaults to false)
- # * :single_passage (defaults to false)
- #
- # The defaults differ from the official PHP client, as I've opted for
- # semantic HTML markup.
- #
- # Example:
- #
- # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
- # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
- #
- # lorem_lipsum = "Lorem ipsum dolor..."
- #
- # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
- # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
- # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
- # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
- # laborum. <span class=\"match\">Pat</span> Cash"]
- #
- # Workflow:
- #
- # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
- # discover encoding and charset information.
- #
- # Therefore, the workflow goes:
- #
- # 1. Do the sphinx query.
- # 2. Fetch the documents found by sphinx from their repositories.
- # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
- #
- def excerpts(options = {})
- options[:index] ||= '*'
- options[:before_match] ||= '<span class="match">'
- options[:after_match] ||= '</span>'
- options[:chunk_separator] ||= ' &#8230; ' # ellipsis
- options[:limit] ||= 256
- options[:around] ||= 5
- options[:exact_phrase] ||= false
- options[:single_passage] ||= false
-
- response = Response.new request(:excerpt, excerpts_message(options))
-
- options[:docs].collect { response.next }
- end
-
- # Update attributes - first parameter is the relevant index, second is an
- # array of attributes to be updated, and the third is a hash, where the
- # keys are the document ids, and the values are arrays with the attribute
- # values - in the same order as the second parameter.
- #
- # Example:
- #
- # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
- #
- def update(index, attributes, values_by_doc)
- response = Response.new request(
- :update,
- update_message(index, attributes, values_by_doc)
- )
-
- response.next_int
- end
-
- # Generates a keyword list for a given query. Each keyword is represented
- # by a hash, with keys :tokenised and :normalised. If return_hits is set to
- # true it will also report on the number of hits and documents for each
- # keyword (see :hits and :docs keys respectively).
- def keywords(query, index, return_hits = false)
- response = Response.new request(
- :keywords,
- keywords_message(query, index, return_hits)
- )
-
- (0...response.next_int).collect do
- hash = {}
- hash[:tokenised] = response.next
- hash[:normalised] = response.next
-
- if return_hits
- hash[:docs] = response.next_int
- hash[:hits] = response.next_int
- end
-
- hash
- end
- end
-
- private
-
- # Connects to the Sphinx daemon, and yields a socket to use. The socket is
- # closed at the end of the block.
- def connect(&block)
- socket = nil
- if @timeout == 0
- socket = initialise_connection
- else
- begin
- Timeout.timeout(@timeout) { socket = initialise_connection }
- rescue Timeout::Error
- raise Riddle::ConnectionError,
- "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
- end
- end
-
- begin
- yield socket
- ensure
- socket.close
- end
- end
-
- def initialise_connection
- socket = TCPSocket.new @server, @port
-
- # Checking version
- version = socket.recv(4).unpack('N*').first
- if version < 1
- socket.close
- raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
- end
-
- # Send version
- socket.send [1].pack('N'), 0
-
- socket
- end
-
- # Send a collection of messages, for a command type (eg, search, excerpts,
- # update), to the Sphinx daemon.
- def request(command, messages)
- response = ""
- status = -1
- version = 0
- length = 0
- message = Array(messages).join("")
-
- connect do |socket|
- case command
- when :search
- # Message length is +4 to account for the following count value for
- # the number of messages (well, that's what I'm assuming).
- socket.send [
- Commands[command], Versions[command],
- 4+message.length, messages.length
- ].pack("nnNN") + message, 0
- else
- socket.send [
- Commands[command], Versions[command], message.length
- ].pack("nnN") + message, 0
- end
-
- header = socket.recv(8)
- status, version, length = header.unpack('n2N')
-
- while response.length < length
- part = socket.recv(length - response.length)
- response << part if part
- end
- end
-
- if response.empty? || response.length != length
- raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
- end
-
- case status
- when Statuses[:ok]
- if version < Versions[command]
- puts format("searchd command v.%d.%d older than client (v.%d.%d)",
- version >> 8, version & 0xff,
- Versions[command] >> 8, Versions[command] & 0xff)
- end
- response
- when Statuses[:warning]
- length = response[0, 4].unpack('N*').first
- puts response[4, length]
- response[4 + length, response.length - 4 - length]
- when Statuses[:error], Statuses[:retry]
- raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
- else
- raise ResponseError, "Unknown searchd error (status: #{status})"
- end
- end
-
- # Generation of the message to send to Sphinx for a search.
- def query_message(search, index, comments = '')
- message = Message.new
-
- # Mode, Limits, Sort Mode
- message.append_ints @offset, @limit, MatchModes[@match_mode],
- RankModes[@rank_mode], SortModes[@sort_mode]
- message.append_string @sort_by
-
- # Query
- message.append_string search
-
- # Weights
- message.append_int @weights.length
- message.append_ints *@weights
-
- # Index
- message.append_string index
-
- # ID Range
- message.append_int 1
- message.append_64bit_ints @id_range.first, @id_range.last
-
- # Filters
- message.append_int @filters.length
- @filters.each { |filter| message.append filter.query_message }
-
- # Grouping
- message.append_int GroupFunctions[@group_function]
- message.append_string @group_by
- message.append_int @max_matches
- message.append_string @group_clause
- message.append_ints @cut_off, @retry_count, @retry_delay
- message.append_string @group_distinct
-
- # Anchor Point
- if @anchor.empty?
- message.append_int 0
- else
- message.append_int 1
- message.append_string @anchor[:latitude_attribute]
- message.append_string @anchor[:longitude_attribute]
- message.append_floats @anchor[:latitude], @anchor[:longitude]
- end
-
- # Per Index Weights
- message.append_int @index_weights.length
- @index_weights.each do |key,val|
- message.append_string key.to_s
- message.append_int val
- end
-
- # Max Query Time
- message.append_int @max_query_time
-
- # Per Field Weights
- message.append_int @field_weights.length
- @field_weights.each do |key,val|
- message.append_string key.to_s
- message.append_int val
- end
-
- message.append_string comments
-
- message.to_s
- end
-
- # Generation of the message to send to Sphinx for an excerpts request.
- def excerpts_message(options)
- message = Message.new
-
- flags = 1
- flags |= 2 if options[:exact_phrase]
- flags |= 4 if options[:single_passage]
- flags |= 8 if options[:use_boundaries]
- flags |= 16 if options[:weight_order]
-
- message.append [0, flags].pack('N2') # 0 = mode
- message.append_string options[:index]
- message.append_string options[:words]
-
- # options
- message.append_string options[:before_match]
- message.append_string options[:after_match]
- message.append_string options[:chunk_separator]
- message.append_ints options[:limit], options[:around]
-
- message.append_array options[:docs]
-
- message.to_s
- end
-
- # Generation of the message to send to Sphinx to update attributes of a
- # document.
- def update_message(index, attributes, values_by_doc)
- message = Message.new
-
- message.append_string index
- message.append_array attributes
-
- message.append_int values_by_doc.length
- values_by_doc.each do |key,values|
- message.append_64bit_int key # document ID
- message.append_ints *values # array of new values (integers)
- end
-
- message.to_s
- end
-
- # Generates the simple message to send to the daemon for a keywords request.
- def keywords_message(query, index, return_hits)
- message = Message.new
-
- message.append_string query
- message.append_string index
- message.append_int return_hits ? 1 : 0
-
- message.to_s
- end
-
- def attribute_from_type(type, response)
- type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
-
- case type
- when AttributeTypes[:float]
- is_multi ? response.next_float_array : response.next_float
- else
- is_multi ? response.next_int_array : response.next_int
- end
- end
- end
-end
View
53 lib/riddle/client/filter.rb
@@ -1,53 +0,0 @@
-module Riddle
- class Client
- # Used for querying Sphinx.
- class Filter
- attr_accessor :attribute, :values, :exclude
-
- # Attribute name, values (which can be an array or a range), and whether
- # the filter should be exclusive.
- def initialize(attribute, values, exclude=false)
- @attribute, @values, @exclude = attribute, values, exclude
- end
-
- def exclude?
- self.exclude
- end
-
- # Returns the message for this filter to send to the Sphinx service
- def query_message
- message = Message.new
-
- message.append_string self.attribute.to_s
- case self.values
- when Range
- if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
- message.append_int FilterTypes[:float_range]
- message.append_floats self.values.first, self.values.last
- else
- message.append_int FilterTypes[:range]
- message.append_ints self.values.first, self.values.last
- end
- when Array
- message.append_int FilterTypes[:values]
- message.append_int self.values.length
- # using to_f is a hack from the php client - to workaround 32bit
- # signed ints on x32 platforms
- message.append_ints *self.values.collect { |val|
- case val
- when TrueClass
- 1.0
- when FalseClass
- 0.0
- else
- val.to_f
- end
- }
- end
- message.append_int self.exclude? ? 1 : 0
-
- message.to_s
- end
- end
- end
-end
View
65 lib/riddle/client/message.rb
@@ -1,65 +0,0 @@
-module Riddle
- class Client
- # This class takes care of the translation of ints, strings and arrays to
- # the format required by the Sphinx service.
- class Message
- def initialize
- @message = ""
- @size_method = @message.respond_to?(:bytesize) ? :bytesize : :length
- end
-
- # Append raw data (only use if you know what you're doing)
- def append(*args)
- return if args.length == 0
-
- args.each { |arg| @message << arg }
- end
-
- # Append a string's length, then the string itself
- def append_string(str)
- @message << [str.send(@size_method)].pack('N') + str
- end
-
- # Append an integer
- def append_int(int)
- @message << [int].pack('N')
- end
-
- def append_64bit_int(int)
- @message << [int >> 32, int & 0xFFFFFFFF].pack('NN')
- end
-
- # Append a float
- def append_float(float)
- @message << [float].pack('f').unpack('L*').pack("N")
- end
-
- # Append multiple integers
- def append_ints(*ints)
- ints.each { |int| append_int(int) }
- end
-
- def append_64bit_ints(*ints)
- ints.each { |int| append_64bit_int(int) }
- end
-
- # Append multiple floats
- def append_floats(*floats)
- floats.each { |float| append_float(float) }
- end
-
- # Append an array of strings - first appends the length of the array,
- # then each item's length and value.
- def append_array(array)
- append_int(array.length)
-
- array.each { |item| append_string(item) }
- end
-
- # Returns the entire message
- def to_s
- @message
- end
- end
- end
-end
View
84 lib/riddle/client/response.rb
@@ -1,84 +0,0 @@
-module Riddle
- class Client
- # Used to interrogate responses from the Sphinx daemon. Keep in mind none
- # of the methods here check whether the data they're grabbing are what the
- # user expects - it just assumes the user knows what the data stream is
- # made up of.
- class Response
- # Create with the data to interpret
- def initialize(str)
- @str = str
- @marker = 0
- end
-
- # Return the next string value in the stream
- def next
- len = next_int
- result = @str[@marker, len]
- @marker += len
-
- return result
- end
-
- # Return the next integer value from the stream
- def next_int
- int = @str[@marker, 4].unpack('N*').first
- @marker += 4
-
- return int
- end
-
- def next_64bit_int
- high, low = @str[@marker, 8].unpack('N*N*')[0..1]
- @marker += 8
-
- return (high << 32) + low
- end
-
- # Return the next float value from the stream
- def next_float
- float = @str[@marker, 4].unpack('N*').pack('L').unpack('f*').first
- @marker += 4
-
- return float
- end
-
- # Returns an array of string items
- def next_array
- count = next_int
- items = []
- for i in 0...count
- items << self.next
- end
-
- return items
- end
-
- # Returns an array of int items
- def next_int_array
- count = next_int
- items = []
- for i in 0...count
- items << self.next_int
- end
-
- return items
- end
-
- def next_float_array
- count = next_int
- items = []
- for i in 0...count
- items << self.next_float
- end
-
- return items
- end
-
- # Returns the length of the streamed data
- def length
- @str.length
- end
- end
- end
-end
View
23 test/files/sphinx.conf
@@ -1,36 +1,23 @@
# searchd and indexer must be run from the root directory of this lib.
-
-indexer
-{
- mem_limit = 64M
-}
-
-searchd
-{
- address = localhost
- port = 3312
+searchd {
+ listen = localhost:9312
log = test/files/tmp/sphinx.log
query_log = test/files/tmp/sphinx.query.log
- read_timeout = 5
pid_file = test/files/tmp/sphinx.pid
- max_matches = 1000
}
-source items
-{
+source items {
type = xmlpipe2
xmlpipe_command = cat test/files/source.xml
}
-index items_main
-{
+index items_main {
source = items
charset_type = utf-8
path = test/files/tmp/items
}
-index items
-{
+index items {
type = distributed
local = items_main
}
View
45 test/helper.rb
@@ -1,45 +1,52 @@
-$VERBOSE = false # Shitloads of warnings in dm :(
-require 'rubygems'
-require 'extlib'
-require 'extlib/hook'
-require 'pathname'
+root = File.expand_path(File.join(File.dirname(__FILE__), '..'))
+
+require File.join(root, 'gems', 'environment')
+Bundler.require_env(:development)
require 'test/unit'
-require 'shoulda'
-base = Pathname.new(__FILE__).dirname + '..'
-files = base + 'test' + 'files'
-%w{lib test}.each{|p| $:.unshift base + p}
+begin
+ require 'shoulda'
+rescue LoadError
+ warn 'Shoulda is required for testing. Use gem bundle to install development gems.'
+ exit 1
+end
+
+base = File.join(File.dirname(__FILE__), '..')
+files = File.join(base, 'test', 'files')
+$:.unshift File.join(base, 'lib')
require 'dm-sphinx-adapter'
+class Test::Unit::TestCase
+end
+
# Sphinx runner.
Dir.chdir(base)
begin
- TCPSocket.new('localhost', '3312')
+ TCPSocket.new('localhost', '9312')
rescue
puts 'Starting Sphinx...'
- system("searchd --config #{files + 'sphinx.conf'}") || exit
+ system("searchd --config #{files}/sphinx.conf") || exit
system('ps aux | grep searchd')
end
-indexer = `indexer --config #{files + 'sphinx.conf'} --all --rotate`
+indexer = `indexer --config #{files}/sphinx.conf --all --rotate`
raise %{Re-create index failed:\n #{indexer}} if indexer =~ /error|fatal/i
sleep 1
# :default is unused at the moment.
-DataMapper.setup(:default, :adapter => 'in_memory', :database => 'dm_sphinx_adapter_test')
-DataMapper.setup(:search, :adapter => 'sphinx')
+# DataMapper.setup(:default, :adapter => 'in_memory', :database => 'dm_sphinx_adapter_test')
+# DataMapper.setup(:search, :adapter => 'sphinx')
+DataMapper.setup(:default, :adapter => 'sphinx')
class Test::Unit::TestCase
- include Extlib::Hook
-
- # after :teardown do
def teardown
- descendants = DataMapper::Resource.descendants.dup.to_a
+ descendants = DataMapper::Model.descendants.dup.to_a
while model = descendants.shift
+ next unless Object.const_defined?(model.name.to_sym)
descendants.concat(model.descendants) if model.respond_to?(:descendants)
Object.send(:remove_const, model.name.to_sym)
- DataMapper::Resource.descendants.delete(model)
+ DataMapper::Model.descendants.delete(model)
end
end
end
View
72 test/test_adapter.rb
@@ -1,68 +1,30 @@
require File.join(File.dirname(__FILE__), 'helper')
-class TestAdapter < Test::Unit::TestCase
- context 'DM::A::Sphinx::Adapter' do
+class AdapterTest < Test::Unit::TestCase
+ context 'Adapter' do
setup do
- load File.join(File.dirname(__FILE__), 'files', 'model.rb')
- @it = repository(:search)
- @resource = Item
- end
-
- context 'class' do
- should 'use default field naming convention' do
- assert_equal(
- DataMapper::NamingConventions::Field::Underscored,
- @it.adapter.field_naming_convention
- )
- end
-
- should 'use default resource naming convention' do
- assert_equal(
- DataMapper::NamingConventions::Resource::UnderscoredAndPluralized,
- @it.adapter.resource_naming_convention
- )
+ class ::Item
+ include DataMapper::Resource
+ property :id, Serial
+ property :t_string, String
+ property :t_text, Text, :lazy => false
end
+ @it = ::Item.repository.adapter
end
- context '#read_many' do
- context 'conditions' do
- should 'return all objects when nil' do
- assert_equal [1, 2, 3], @it.read_many(query).map{|d| d[:id]}
- end
-
- should 'return subset of objects for conditions' do
- assert_equal [2], @it.read_many(query(:t_string => 'two')).map{|d| d[:id]}
- end
+ context 'instance' do
+ should 'be adapter intance' do
+ assert_kind_of DataMapper::Adapters::SphinxAdapter, @it
+ assert_kind_of DataMapper::Sphinx::Adapter, @it
end
- context 'offsets' do
- should 'be able to offset the objects' do
- assert_equal [1, 2, 3], @it.read_many(query(:offset => 0)).map{|d| d[:id]}
- assert_equal [2, 3], @it.read_many(query(:offset => 1)).map{|d| d[:id]}
- assert_equal [], @it.read_many(query(:offset => 3))
- end
+ should 'get all items' do
+ assert_equal 3, Item.all.size # should invoke full scan mode.
end
- context 'limits' do
- should 'be able to limit the objects' do
- assert_equal [1], @it.read_many(query(:limit => 1)).map{|d| d[:id]}
- assert_equal [1, 2], @it.read_many(query(:limit => 2)).map{|d| d[:id]}
- end
- end
- end
-
- context '#read_one' do
- should 'return the first object of a #read_many' do
- assert_equal @it.read_many(query).first, @it.read_one(query)
-
- query = query(:t_string => 'two')
- assert_equal @it.read_many(query).first, @it.read_one(query)
+ should 'be sphinx query' do
+ assert_kind_of DataMapper::Sphinx::Query, DataMapper::Query.new(::Item.repository, ::Item, {})
end
end
end
-
- protected
- def query(conditions = {})
- DataMapper::Query.new(repository(:search), @resource, conditions)
- end
-end
+end # AdapterTest
View
36 test/test_attribute.rb
@@ -1,36 +0,0 @@
-require File.join(File.dirname(__FILE__), 'helper')
-
-class TestAttribute < Test::Unit::TestCase
- context 'DM::A::Sphinx::Attribute instance' do
- should 'typecast DateTime to Integer'
- should 'typecast Date to Integer'
- should 'typecast Time to Integer'
- should 'typecast BigDecimal to Float'
- end
-
- context 'DM::A::Sphinx::Resource#attribute class method' do
- setup do
- class ::Resource
- include DataMapper::SphinxResource
- end
- end
-
- DataMapper::Adapters::Sphinx::Attribute::TYPES.each do |type|
- should "accept a #{type} type" do
- assert_nothing_raised do
- Resource.class_eval do
- attribute :name, type
- end
- end
- end
- end
-
- should 'raise ArgumentError for unsupported type' do
- assert_raise(ArgumentError) do
- Resource.class_eval do
- attribute :name, Test::Unit::TestCase
- end
- end
- end
- end
-end
View
26 test/test_collection.rb
@@ -1,26 +0,0 @@
-require File.join(File.dirname(__FILE__), 'helper')
-
-class TestAdapter < Test::Unit::TestCase
- context 'DM::A::Sphinx::Collection instance' do
- setup do
- load File.join(File.dirname(__FILE__), 'files', 'model.rb')
- @it = repository(:search)
- @resource = Item
- end
-
- should 'have total' do
- assert_equal 3, @it.read_many(query).total
- assert_equal 1, @it.read_many(query(:t_string => 'two')).total
- end
-
- should 'have words' do
- words = {'two' => {:docs => 1, :hits => 2}}
- assert_equal words, @it.read_many(query(:t_string => 'two')).words
- end
- end
-
- protected
- def query(conditions = {})
- DataMapper::Query.new(repository(:search), @resource, conditions)
- end
-end
View
30 test/test_index.rb
@@ -1,30 +0,0 @@
-require File.join(File.dirname(__FILE__), 'helper')
-
-class TestIndex < Test::Unit::TestCase
- context 'DM::A::Sphinx::Index instance' do
- should 'respond to delta?'
- end
-
- context 'DM::A::Sphinx::Resource class' do
- setup do
- class ::Resource
- include DataMapper::SphinxResource
- end
- end
-
- context '#index method' do
- should 'append an index' do
- assert_nothing_raised do
- Resource.class_eval do
- index :name
- end
- end
- end
- end
-
- context '#sphinx_indexes method' do
- should 'return DM::A::Sphinx::Index objects'
- should 'return delta indexes at the end of the list'
- end
- end
-end
View
61 test/test_query.rb
@@ -1,61 +0,0 @@
-require File.join(File.dirname(__FILE__), 'helper')
-
-class TestQuery < Test::Unit::TestCase
- context 'DM::A::Sphinx::Query conditions' do
- setup do
- load File.join(File.dirname(__FILE__), 'files', 'model.rb')
- @adapter = repository(:search)
- @resource = Item
- end
-
- should 'treat nil operator as extended field match' do
- assert_equal '@t_string "foo"', query_string(:t_string => 'foo')
- end
-
- should 'treat .eql operator as extended field match' do
- assert_equal '@t_string "foo"', query_string(:t_string.eql => 'foo')
- end
-
- should 'treat .like operator as extended field match' do
- assert_equal '@t_string "foo"', query_string(:t_string.like => 'foo')
- end
-
- should 'treat Array as extended field AND match' do
- assert_equal '@t_string "foo bar"', query_string(:t_string => %w{foo bar})
- end
-
- should 'treat .not opeartor as extended field NOT match' do
- assert_equal '@t_string -"foo"', query_string(:t_string.not => 'foo')
- end
-
- should 'treat Array .not operator as extended field NOT match' do
- assert_equal '@t_string -"foo bar"', query_string(:t_string.not => %w{foo bar})
- end
-
- should 'treat .in operator as extended OR match' do
- assert_equal '@t_string ("foo" | "bar")', query_string(:t_string.in => %w{foo bar})
- end
-
- should 'treat multiple .eql operators as AND search' do
- # When is DM going to switch conditions to an array? :(
- assert(/(?:@t_string "b" )?@t_string "a"(?: @t_string "b")?/.match(
- query_string(:t_string.eql => 'a', :t_string.eql => 'b')
- ))
- end
-
- should 'leave raw conditions as they are' do
- assert_equal '"foo bar"~10', query_string(:conditions => ['"foo bar"~10'])
- end
- end
-
- protected
- def query(conditions = {})
- DataMapper::Adapters::Sphinx::Query.new(
- DataMapper::Query.new(@adapter, @resource, conditions)
- )
- end
-
- def query_string(conditions = {})
- query(conditions).to_s
- end
-end
View
17 test/test_resource.rb