add Chinese support

raecoo committed Oct 13, 2009
1 parent 7ab73a9 commit b6338b65002fd538ec0ecc617e0768b507673f84
+h1. Thinking Sphinx Chinese
h2. Usage
+add follow set options into sphinx.yml then regenerate index and restart sphinx daemon that's ok
+charset_type: zh_cn.utf-8
+charset_dictpath: <your/dict/full/path>
+Sphinx Chinese Patch:
+more options informations pls forward to
SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
sql_query_post sql_ranged_throttle sql_query_post_index )
- IndexOptions = %w( charset_table charset_type docinfo enable_star
+ IndexOptions = %w( charset_table charset_type charset_dictpath docinfo enable_star
exceptions html_index_attrs html_remove_elements html_strip ignore_chars
min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
ngram_len phrase_boundary phrase_boundary_step preopen stopwords
attr_accessor :config_file, :searchd_log_file, :query_log_file,
:pid_file, :searchd_file_path, :address, :port, :allow_star,
:database_yml_file, :app_root, :bin_path, :model_directories,
- :delayed_job_priority, :searchd_binary_name, :indexer_binary_name
+ :delayed_job_priority, :searchd_binary_name, :indexer_binary_name,
+ :charset_dictpath
attr_accessor :source_options, :index_options
def utf8?
- @index.options[:charset_type] == "utf-8"
+ @index.options[:charset_type] =~ /utf-8|zh_cn.utf-8/
class Configuration
class Index < Riddle::Configuration::Section
self.settings = [:source, :path, :docinfo, :mlock, :morphology,
- :stopwords, :wordforms, :exceptions, :min_word_len, :charset_type,
+ :stopwords, :wordforms, :exceptions, :min_word_len, :charset_type, :charset_dictpath,
:charset_table, :ignore_chars, :min_prefix_len, :min_infix_len,
:prefix_fields, :infix_fields, :enable_star, :ngram_len, :ngram_chars,
:phrase_boundary, :phrase_boundary_step, :html_strip,
:html_index_attrs, :html_remove_elements, :preopen]
attr_accessor :name, :parent, :sources, :path, :docinfo, :mlock,
:morphologies, :stopword_files, :wordform_files, :exception_files,
- :min_word_len, :charset_type, :charset_table, :ignore_characters,
+ :min_word_len, :charset_type, :charset_table, :ignore_characters, :charset_dictpath,
:min_prefix_len, :min_infix_len, :prefix_field_names,
:infix_field_names, :enable_star, :ngram_len, :ngram_characters,
:phrase_boundaries, :phrase_boundary_step, :html_strip,

