Skip to content
This repository
tag: v3.0.0.beta2
Fetching contributors…

Cannot retrieve contributors at this time

file 62 lines (53 sloc) 2.654 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
# encoding: utf-8

require 'active_support/core_ext/module/attribute_accessors'

module ActiveSupport #:nodoc:
  module Multibyte
    autoload :EncodingError, 'active_support/multibyte/exceptions'
    autoload :Chars, 'active_support/multibyte/chars'
    autoload :UnicodeDatabase, 'active_support/multibyte/unicode_database'
    autoload :Codepoint, 'active_support/multibyte/unicode_database'
    autoload :UCD, 'active_support/multibyte/unicode_database'
    
    # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
    # information about normalization.
    NORMALIZATION_FORMS = [:c, :kc, :d, :kd]

    # The Unicode version that is supported by the implementation
    UNICODE_VERSION = '5.1.0'

    # The default normalization used for operations that require normalization. It can be set to any of the
    # normalizations in NORMALIZATION_FORMS.
    #
    # Example:
    # ActiveSupport::Multibyte.default_normalization_form = :c
    mattr_accessor :default_normalization_form
    self.default_normalization_form = :kc

    # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
    # class so you can support other encodings. See the ActiveSupport::Multibyte::Chars implementation for
    # an example how to do this.
    #
    # Example:
    # ActiveSupport::Multibyte.proxy_class = CharsForUTF32
    def self.proxy_class=(klass)
      @proxy_class = klass
    end

    # Returns the currect proxy class
    def self.proxy_class
      @proxy_class ||= ActiveSupport::Multibyte::Chars
    end

    # Regular expressions that describe valid byte sequences for a character
    VALID_CHARACTER = {
      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
      'UTF-8' => /\A(?:
[\x00-\x7f] |
[\xc2-\xdf] [\x80-\xbf] |
\xe0 [\xa0-\xbf] [\x80-\xbf] |
[\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
\xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
[\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
\xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
      # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
      'Shift_JIS' => /\A(?:
[\x00-\x7e\xa1-\xdf] |
[\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
    }
  end
end

require 'active_support/multibyte/utils'
Something went wrong with that request. Please try again.