Permalink
Browse files

hopefully this will be a more transparent interface to sphinx (and ea…

…sier to debug) which will mean no more hacking ultrasphinx.
  • Loading branch information...
1 parent f7a85cf commit 78d8a938a777ea6401ff07d42fa5977e088abb0d @ninjudd committed Mar 16, 2009
Showing with 256 additions and 5 deletions.
  1. +2 −2 README
  2. +3 −3 Rakefile
  3. +196 −0 lib/minisphinx.rb
  4. +55 −0 lib/minisphinx/charset.rb
View
4 README
@@ -1,9 +1,9 @@
minisphinx
==========
-Description goes here.
+An alternative to Ultrasphinx. But nobody will use it until I write some documentation.
COPYRIGHT
=========
-Copyright (c) 2008 Justin Balthrop. See LICENSE for details.
+Copyright (c) 2009 Justin Balthrop. See LICENSE for details.
View
@@ -6,10 +6,10 @@ begin
require 'jeweler'
Jeweler::Tasks.new do |s|
s.name = "minisphinx"
- s.summary = %Q{TODO}
- s.email = "justin@geni.com"
+ s.summary = %Q{An alternative to ultrasphinx. Index ActiveRecord models with the sphinx search engine.}
+ s.email = "code@justinbalthrop.com"
s.homepage = "http://github.com/ninjudd/minisphinx"
- s.description = "TODO"
+ s.description = "An alternative to ultrasphinx."
s.authors = ["Justin Balthrop"]
end
rescue LoadError
View
@@ -0,0 +1,196 @@
+module Minisphinx
+ def sphinx_source(name, opts)
+ opts[:model_class] ||= self
+ Minisphinx.sources << Source.new(name, opts)
+ end
+
+ def sphinx_index(name, opts = {})
+ Minisphinx.indexes << Index.new(name, opts)
+ end
+
+ def self.indexes; @indexes ||= []; end
+ def self.sources; @sources ||= []; end
+
+ def self.configure(opts)
+ path = opts[:path] || RAILS_ROOT + '/config'
+ template = ['default', RAILS_ENV].collect {|base| RAILS_ROOT + "/config/sphinx/#{base}.conf"}.detect {|file| File.exists?(file)}
+
+ File.open(path + '/sphinx.conf', 'w') do |file|
+ file << "# Autogenerated by minisphinx at #{Time.now}"
+ file << ERB.new(IO.read(template)).result(binding)
+ file << "\n# Sources\n"
+ sources.each do |source|
+ file << source.to_s + "\n\n"
+ end
+ file << "\n# Indexes\n"
+ indexes.each do |index|
+ file << index.to_s + "\n\n"
+ end
+ end
+ end
+
+ def self.config_block(head, lines)
+ "#{head}\n{\n #{lines.flatten.compact.join("\n ")}\n}"
+ end
+
+ class Source
+ attr_reader :name, :table_name, :model_class, :fetch_key, :db, :fields, :joins, :config
+
+ def initialize(name, opts)
+ @name = name
+ @model_class = opts.delete(:model_class)
+ @table_name = opts.delete(:table_name) || model_class.table_name # table_name is required
+ @fetch_key = opts.delete(:fetch_key) || 'id'
+
+ @db = opts[:db] || model_class.connection.instance_variable_get("@config")
+ @db = ActiveRecord::Base.configurations["#{db}_#{RAILS_ENV}"] unless db.kind_of?(Hash)
+
+ @fields = initialize_fields(opts)
+ @joins = opts.delete(:joins) || []
+
+ (opts.delete(:include) || []).each do |include_opts|
+ @fields.concat initialize_fields(include_opts)
+ @joins.concat include_opts.delete(:joins) || []
+ end
+ raise 'at least one field required' if @fields.empty?
+ @fields.sort!
+
+ @config = self.class.config.merge(opts)
+ end
+
+ def table_name
+ @table_name ||= model_class.table_name
+ end
+
+ def type
+ db[:adapter] == 'postgresql' ? 'pgsql' : db[:adapter]
+ end
+
+ def to_s
+ Minisphinx.config_block("source #{name}", [
+ "type = #{type}",
+ config.collect do |key, value|
+ "sql_#{key} = #{value}"
+ end,
+ "sql_db = #{db[:database]}",
+ "sql_host = #{db[:host]}",
+ "sql_pass = #{db[:password]}",
+ "sql_user = #{db[:username]}",
+ "sql_query_range = #{sql_query_range}",
+ "sql_query = #{sql_query}",
+ "sql_query_info = #{sql_query_info}",
+ fields.collect do |field|
+ "sql_attr_#{field.type} = #{field.name}" if field.type != :string
+ end,
+ ])
+ end
+
+ def sql_query_range
+ "SELECT coalesce(MIN(#{fetch_key}),1)::bigint, coalesce(MAX(#{fetch_key}),1)::bigint FROM #{table_name}"
+ end
+
+ def sql_query
+ "SELECT #{table_name}.id AS doc_id, #{fields.join(', ')} #{joins.join(' ')} WHERE #{fetch_key} >= $start AND #{fetch_key} <= $end"
+ end
+
+ def sql_query_info
+ "SELECT * FROM #{table_name} WHERE search_profiles.id = $id"
+ end
+
+ def self.config
+ @config ||= {
+ :range_step => 5000,
+ :ranged_throttle => 0,
+ }
+ end
+
+ private
+
+ def initialize_fields(opts)
+ (opts.delete(:fields) || []).collect do |field_opts|
+ field_opts = {:field => field_opts} unless field_opts.kind_of?(Hash)
+ field_opts[:table_name] = table_name
+ field_opts[:model_class] = model_class
+ [Field.new(field_opts), field_opts[:sortable] && Field.new(field_opts.merge(:type => :str2ordinal, :suffix => 'sortable'))]
+ end.flatten.compact
+ end
+ end
+
+ class Field
+ attr_reader :table_name, :model_class, :field, :name, :type
+
+ TYPE_MAP = {
+ :integer => :uint,
+ :bigint => :bigint,
+ :float => :float,
+ :boolean => :bool,
+ :date => :timestamp,
+ :datetime => :timestamp,
+ :timestamp => :timestamp,
+ :string => :string,
+ :text => :string,
+ }
+
+ def initialize(opts)
+ @model_class = opts[:model_class]
+ @table_name = opts[:table_name]
+ @table_name ||= model_class.table_name if model_class # table_name is not required
+
+ @field = opts[:field]
+ @name = opts[:as] || opts[:field]
+ @name = "#{name}_#{opts[:suffix]}" if opts[:suffix]
+ @field = "#{table_name}.#{field}" if not field.index('.')
+ @field = opts[:transform] % field if opts[:transform]
+ @type = opts[:type]
+ end
+
+ def <=>(other)
+ # Sphinx has a bug that messes up your index unless str2ordinal fields come first.
+ if type != other.type
+ type == :str2ordinal && -1 || other.type == :str2ordinal && 1 || type.to_s <=> other.type.to_s
+ else
+ name <=> other.name
+ end
+ end
+
+ def type
+ @type ||= (model_class and column = model_class.columns_hash[name]) && TYPE_MAP[column.type] || :string
+ end
+
+ def to_s
+ "#{field} AS #{name}"
+ end
+ end
+
+ class Index
+ attr_reader :name, :sources, :charset, :config
+
+ def initialize(name, opts)
+ @name = name
+ @config = self.class.config.merge(opts)
+ @sources = Array(config.delete(:source)) + Array(config.delete(:sources))
+ @charset = Minisphinx::Charset.new(config.delete(:charset)) if config[:charset]
+ end
+
+ def to_s
+ Minisphinx.config_block("index #{name}", [
+ sources.collect do |source|
+ "source = #{source}"
+ end,
+ config.collect do |key, value|
+ "#{key} = #{value}"
+ end,
+ charset && "charset_table = #{charset}",
+ ])
+ end
+
+ def self.config
+ @config ||= {
+ :charset_type => 'utf-8',
+ :min_word_len => 1,
+ :html_strip => 0,
+ :docinfo => 'extern',
+ }
+ end
+ end
+end
View
@@ -0,0 +1,55 @@
+module Minisphinx
+ class Charset
+ attr_reader :type, :only, :except
+
+ def initialize(opts)
+ @type = opts[:type] || :standard
+ @only = Array(opts[:only]).to_set if opts[:only]
+ @except = Array(opts[:except]).to_set if opts[:except]
+ end
+
+ def self.charset
+ if @charset.nil?
+ @charset = DeepHash.new
+ YAML.load_file(RAILS_ROOT + '/config/sphinx/charset.yml').each do |type, table|
+ type = type.to_sym
+ table.each do |group, charset|
+ group = group.to_sym
+ charset.split(',').each do |char|
+ key, value = char.strip.split('->')
+ @charset[type][group][key] = value
+ end
+ end
+ end
+ end
+ @charset
+ end
+
+ MAX_PER_LINE = 50
+ def to_s
+ chars = []
+ self.class.charset[type].each do |group, charset|
+ next if except and except.include?(group)
+ next if only and not only.include?(group)
+
+ charset.each do |key, value|
+ chars << (value ? "#{key}->#{value}" : key)
+ end
+ end
+
+ lines = []
+ chars.sort.each_slice(MAX_PER_LINE) do |line_chars|
+ lines << line_chars.join(', ')
+ end
+ lines.join(", \\\n")
+ end
+ end
+end
+
+class DeepHash < Hash
+ def initialize
+ super do |hash, key|
+ hash[key] = DeepHash.new
+ end
+ end
+end

0 comments on commit 78d8a93

Please sign in to comment.