Permalink
Browse files

Using new solr schema

  • Loading branch information...
1 parent 2573f4f commit a9bce88939be8702816d659bdcf7e844296cb163 @jcoyne jcoyne committed Jan 20, 2013
View
@@ -5,7 +5,7 @@ Gemfile.lock
*.sqlite3
*.log
*~
-*.swp
+*.sw[pon]
pkg/
coverage/*
View
@@ -1,3 +1,15 @@
+h2. 3.0.0
+suffix changes:
+ _s -> _si
+ _t -> _tesim
+ _dt -> _dtsi
+ _i -> _isim
+ _sort -> _ssi
+ _display -> _sim
+ _facet -> _sim
+ _unstem_search -> _tim
+
+
h2. 2.1.0
#11 There should only be one instance of the field mapper. It's now at Solrizer.default_field_mapper
Extract create_and_insert_terms into Solrizer::Common. This can be used for RDF datastreams
@@ -0,0 +1,69 @@
+module Solrizer
+ module DefaultDescriptors
+
+ # Produces a _sim suffix
+ def self.facetable
+ @facetable ||= Descriptor.new(:string, :indexed, :multivalued)
+ end
+
+ # Most interesting case because the suffixe produced depends on the type parameter
+ # produces suffixes:
+ # _tesim - for strings or text fields
+ # _dtsim - for dates
+ # _isim - for integers
+ def self.searchable
+ @searchable ||= Descriptor.new(searchable_field_definition, converter: searchable_converter)
+ end
+
+ # Produces a _ssi suffix
+ def self.sortable
+ @sortable ||= Descriptor.new(:string, :indexed, :stored)
+ end
+
+ # Produces a _sim suffix
+ def self.displayable
+ @displayable ||= Descriptor.new(:string, :indexed, :multivalued)
+ end
+
+ # Produces a _tim suffix (used to be _unstem)
+ def self.unstemmed_searchable
+ @unstemmed_searchable ||= Descriptor.new(:text, :indexed, :multivalued)
+ end
+
+ def self.simple
+ @simple ||= Descriptor.new(lambda {|field_type| [field_type, :indexed]})
+ end
+ protected
+
+ def self.searchable_field_definition
+ lambda do |type|
+ type = :text_en if [:string, :text].include?(type) # for backwards compatibility with old solr schema
+ vals = [type, :indexed, :stored]
+ vals << :multivalued unless type == :date
+ vals
+ end
+ end
+
+ def self.searchable_converter
+ lambda do |type|
+ case type
+ when :date
+ lambda { |val| iso8601_date(val)}
+ end
+ end
+ end
+
+
+ def self.iso8601_date(value)
+ begin
+ if value.is_a?(Date)
+ DateTime.parse(value.to_s).to_time.utc.iso8601
+ elsif !value.empty?
+ DateTime.parse(value).to_time.utc.iso8601
+ end
+ rescue ArgumentError => e
+ raise ArgumentError, "Unable to parse `#{value}' as a date-time object"
+ end
+ end
+ end
+end
View
@@ -0,0 +1,58 @@
+module Solrizer
+ class Descriptor
+ attr_reader :index_type
+ def initialize(*args)
+ if args.last.kind_of? Hash
+ opts = args.pop
+ @converter = opts[:converter]
+ end
+ @index_type = args
+ raise Solrizer::InvalidIndexDescriptor, "Invalid index type passed to Sorizer.solr_name. It should be an array like [:string, :indexed, :stored, :multivalued]. You provided: `#{@index_type}'" unless index_type.kind_of? Array
+ end
+
+ def name_and_converter(field_name, field_type)
+ [field_name.to_s + suffix(field_type), converter(field_type)]
+ end
+
+ protected
+ def suffix(field_type)
+ evaluated_type = index_type.first.kind_of?(Proc) ? index_type.first.call(field_type) : index_type.dup
+ stored_suffix = config[:stored_suffix] if evaluated_type.delete(:stored)
+ index_suffix = config[:index_suffix] if evaluated_type.delete(:indexed)
+ multivalued_suffix = config[:multivalued_suffix] if evaluated_type.delete(:multivalued)
+ index_datatype = evaluated_type.first
+ raise Solrizer::InvalidIndexDescriptor, "Missing datatype for #{evaluated_type}" unless index_datatype
+ type_suffix = config[:type_suffix].call(index_datatype)
+ raise Solrizer::InvalidIndexDescriptor, "Invalid datatype `#{index_datatype.inspect}'. Must be one of: :date, :text, :text_en, :string, :integer" unless type_suffix
+
+ suffix = [config[:suffix_delimiter], type_suffix, stored_suffix, index_suffix, multivalued_suffix].join
+ end
+
+ def converter(field_type)
+ @converter.call(field_type) if @converter
+ end
+
+ private
+ def config
+ @config ||=
+ {suffix_delimiter: '_',
+ type_suffix: lambda do |type|
+ case type
+ when :string, :symbol # TODO `:symbol' useage ought to be deprecated
+ 's'
+ when :text
+ 't'
+ when :text_en
+ 'te'
+ when :date
+ 'dt'
+ when :integer
+ 'i'
+ end
+ end,
+ stored_suffix: 's',
+ index_suffix: 'i',
+ multivalued_suffix: 'm'}
+ end
+ end
+end
@@ -1,7 +1,11 @@
require "loggable"
require 'active_support'
+require 'active_support/core_ext/class/attribute'
module Solrizer
-
+
+ class SolrizerError < RuntimeError; end #nodoc#
+ class InvalidIndexDescriptor < SolrizerError; end #nodoc#
+ class UnknownIndexMacro < SolrizerError; end #nodoc#
# Maps Term names and values to Solr fields, based on the Term's data type and any index_as options.
#
# The basic structure of a mapper is:
@@ -217,6 +221,9 @@ def self.apply_instance_init_actions(instance)
# ------ Instance methods ------
attr_reader :id_field, :default_index_types, :mappings
+ class_attribute :descriptors
+ self.descriptors = [DefaultDescriptors]
+
def initialize
@mappings = {}
@@ -226,9 +233,41 @@ def initialize
# Given a specific field name, data type, and index type, returns the corresponding solr name.
- def solr_name(field_name, field_type, index_type = :searchable)
- name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
- name
+ # TODO field type is the input format, maybe we could just detect that?
+ # @param index_type is a FieldDescriptor
+ def solr_name(field_name, field_type, index_type = nil)
+ # if they don't provide an index type, give them a basic indexed field for that type.
+ # This is primarily to provide backward compatibility. TODO deprecate this behavior
+ index_type = index_type_macro(:simple) if index_type.nil?
+ solr_name_and_converter(field_name, field_type, index_type).first
+ end
+
+ # @param index_type [Symbol]
+ # search through the descriptors (class attribute) until a module is found that responds to index_type, then call it.
+ def index_type_macro(index_type)
+ klass = self.class.descriptors.find { |klass| klass.respond_to? index_type}
+ if klass
+ klass.send(index_type)
+ else
+ raise UnknownIndexMacro, "Unable to find `#{index_type}' in #{self.class.descriptors}"
+ end
+ end
+
+ # @param index_type is a FieldDescriptor or a symbol that points to a method that returns a field descriptor
+ def solr_name_and_converter(field_name, field_type, index_type)
+ index_type = case index_type
+ when Symbol
+ index_type_macro(index_type)
+ when Array
+ raise "It's not yet supposed to be an array"
+ #IndexDescriptors::Descriptor.new(*index_type)
+ else
+ index_type
+ end
+
+
+ raise InvalidIndexDescriptor, "index type should be an IndexDescriptor, you passed: #{index_type}" unless index_type.kind_of? Descriptor
+ index_type.name_and_converter(field_name, field_type)
end
# Given a field name-value pair, a data type, and an array of index types, returns a hash of
@@ -238,7 +277,6 @@ def solr_names_and_values(field_name, field_value, field_type, index_types)
# Determine the set of index types, adding defaults and removing not_xyz
index_types ||= []
- index_types += default_index_types
index_types.uniq!
index_types.dup.each do |index_type|
if index_type.to_s =~ /^not_(.*)/
@@ -253,19 +291,20 @@ def solr_names_and_values(field_name, field_value, field_type, index_types)
index_types.each do |index_type|
# Get mapping for field
- name, mapping, data_type_mapping = solr_name_and_mappings(field_name, field_type, index_type)
+ name, converter = solr_name_and_converter(field_name, field_type, index_type)
next unless name
# Is there a custom converter?
- value = if data_type_mapping && data_type_mapping.converter
- converter = data_type_mapping.converter
+ # TODO instead of a custom converter, look for input data type and output data type. Create a few methods that can do that cast.
+
+ value = if converter
if converter.arity == 1
converter.call(field_value)
else
converter.call(field_value, field_name)
end
else
- field_value
+ field_value.to_s
end
# Add mapped name & value, unless it's a duplicate
@@ -277,23 +316,6 @@ def solr_names_and_values(field_name, field_value, field_type, index_types)
end
private
-
- def solr_name_and_mappings(field_name, field_type, index_type)
- field_name = field_name.to_s
- mapping = @mappings[index_type]
- unless mapping
- logger.debug "Unknown index type '#{index_type}' for field #{field_name}"
- return nil
- end
-
- data_type_mapping = mapping.data_types[field_type] || mapping.data_types[:default]
-
- suffix = data_type_mapping.opts[:suffix] if data_type_mapping
- suffix ||= mapping.opts[:suffix]
- name = field_name + suffix
-
- [name, mapping, data_type_mapping]
- end
class IndexTypeMapping
attr_accessor :opts, :data_types
@@ -14,26 +14,26 @@ class Foo
solr_doc = {}
directive = Solrizer::Directive.new(:string, [:displayable, :searchable, :sortable] )
Foo.create_and_insert_terms('my_name', 'value', directive, solr_doc)
- solr_doc.should == {'my_name_t' => ['value'], 'my_name_sort' => ['value'], 'my_name_display' => ['value']}
+ solr_doc.should == {'my_name_sim' => ['value'], 'my_name_ssi' => ['value'], 'my_name_tesim' => ['value']}
end
it "should handle dates that are searchable" do
solr_doc = {}
directive = Solrizer::Directive.new(:date, [:searchable] )
Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), directive, solr_doc)
- solr_doc.should == {'my_name_dt' => ['2013-01-10T00:00:00Z']}
+ solr_doc.should == {'my_name_dtsi' => ['2013-01-10T00:00:00Z']}
end
it "should handle dates that are displayable" do
solr_doc = {}
directive = Solrizer::Directive.new(:date, [:displayable])
Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), directive, solr_doc)
- solr_doc.should == {'my_name_display' => ['2013-01-10']}
+ solr_doc.should == {'my_name_sim' => ['2013-01-10']}
end
it "should handle dates that are sortable" do
solr_doc = {}
directive = Solrizer::Directive.new(:date, [:sortable])
Foo.create_and_insert_terms('my_name', Date.parse('2013-01-10'), directive, solr_doc)
- solr_doc.should == {'my_name_sort' => ['2013-01-10']}
+ solr_doc.should == {'my_name_ssi' => ['2013-01-10']}
end
end
Oops, something went wrong.

0 comments on commit a9bce88

Please sign in to comment.