Permalink
Browse files

big refactor of string_extensions conversions that in hindsight i sho…

…uld have split apart cleaner
  • Loading branch information...
1 parent afc34c9 commit 430efc1cc54bf666c230087e9b04958a1d148642 @rsl committed Mar 22, 2013
@@ -1,5 +1,6 @@
# encoding: UTF-8
+require 'stringex/localization/converter'
require 'stringex/localization/default_conversions'
module Stringex
@@ -86,8 +87,11 @@ def reset!
@backend = @translations = @locale = @default_locale = nil
end
- def currencies_supported_regex
- Regexp.new DefaultConversions::CURRENCIES_SUPPORTED.map{|x| Regexp.escape(x)}.join('|')
+ def convert(string, options = {}, &block)
+ converter = Converter.new(string)
+ converter.instance_exec &block
+ converter.smart_strip!
+ converter.string
end
private
@@ -106,4 +110,4 @@ def default_conversion(scope, key)
end
end
end
-end
+end
@@ -0,0 +1,148 @@
+# encoding: UTF-8
+
+module Stringex
+ module Localization
+ module ConversionExpressions
+ ABBREVIATION = /(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/
+
+ ACCENTED_HTML_ENTITY = /&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/
+
+ APOSTROPHE = /(^|[[:alpha:]])'|`([[:alpha:]]|$)/
+
+ CHARACTERS = {
+ :and => /\s*&\s*/,
+ :at => /\s*@\s*/,
+ :degrees => /\s\s*/,
+ :divide => /\s\s*/,
+ :dot => /(\S|^)\.(\S)/,
+ :ellipsis => /\s*\.{3,}\s*/,
+ :equals => /\s*=\s*/,
+ :number => /\s*#/,
+ :percent => /\s*%\s*/,
+ :plus => /\s*\+\s*/,
+ :slash => /\s*(\\|\/|/)\s*/,
+ :star => /\s*\*\s*/,
+ }
+
+ # Things that just get converted to spaces
+ CLEANUP_CHARACTERS = /[\.,:;()\[\]\/\?!\^'ʼ"_\|]/
+ CLEANUP_HTML_ENTITIES = /&[^;]+;/
+
+ CURRENCIES_SUPPORTED_SIMPLE = {
+ :dollars => /\$/,
+ :euros => //,
+ :pounds => /£/,
+ :yen => /¥/,
+ }
+ CURRENCIES_SUPPORTED_COMPLEX = {
+ :dollars => :dollars_cents,
+ :euros => :euros_cents,
+ :pounds => :pounds_pence,
+ }
+ CURRENCIES_SUPPORTED = Regexp.new(CURRENCIES_SUPPORTED_SIMPLE.values.join('|'))
+ CURRENCIES_SIMPLE = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
+ key, expression = content
+ hash[key] = /(?:\s|^)#{expression}(\d*)(?:\s|$)/u
+ hash
+ end
+ CURRENCIES_COMPLEX = CURRENCIES_SUPPORTED_SIMPLE.inject({}) do |hash, content|
+ key, expression = content
+ # Do we really need to not worry about complex currencies if there are none for the currency?
+ complex_key = CURRENCIES_SUPPORTED_COMPLEX[key]
+ if complex_key
+ hash[complex_key] = /(?:\s|^)#{expression}(\d+)\.(\d+)(?:\s|$)/u
+ end
+ hash
+ end
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
+
+ HTML_ENTITIES = Proc.new(){
+ base = {
+ :amp => %w{#38 amp},
+ :cent => %w{#162 cent},
+ :copy => %w{#169 copy},
+ :deg => %w{#176 deg},
+ :divide => %w{#247 divide},
+ :double_quote => %w{#34 #822[012] quot ldquo rdquo dbquo},
+ :ellipsis => %w{#8230 hellip},
+ :en_dash => %w{#8211 ndash},
+ :em_dash => %w{#8212 emdash},
+ :frac14 => %w{#188 frac14},
+ :frac12 => %w{#189 frac12},
+ :frac34 => %w{#190 frac34},
+ :gt => %w{#62 gt},
+ :lt => %w{#60 lt},
+ :nbsp => %w{#160 nbsp},
+ :pound => %w{#163 pound},
+ :reg => %w{#174 reg},
+ :single_quote => %w{#39 #821[678] apos lsquo rsquo sbquo},
+ :times => %w{#215 times},
+ :trade => %w{#8482 trade},
+ :yen => %w{#165 yen},
+ }
+ base.inject({}) do |hash, content|
+ key, expression = content
+ hash[key] = /&(#{expression.join('|')});/
+ hash
+ end
+ }.call
+
+ HTML_TAG = Proc.new(){
+ name = /[\w:_-]+/
+ value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
+ attr = /(#{name}(\s*=\s*#{value})?)/
+ /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
+ }.call
+
+ SMART_PUNCTUATION = {
+ /(“|”|\302\223|\302\224|\303\222|\303\223)/ => '"',
+ /(‘|’|\302\221|\302\222|\303\225)/ => "'",
+ // => "...",
+ }
+
+ # Ordered by denominator then numerator of the value
+ VULGAR_FRACTIONS = {
+ :half => /(&#189;|&frac12;|½)/,
+ :one_third => /(&#8531;|⅓)/,
+ :two_thirds => /(&#8532;|⅔)/,
+ :one_fourth => /(&#188;|&frac14;|¼)/,
+ :three_fourths => /(&#190;|&frac34;|¾)/,
+ :one_fifth => /(&#8533;|⅕)/,
+ :two_fifths => /(&#8534;|⅖)/,
+ :three_fifths => /(&#8535;|⅗)/,
+ :four_fifths => /(&#8536;|⅘)/,
+ :one_sixth => /(&#8537;|⅙)/,
+ :five_sixths => /(&#8538;|⅚)/,
+ :one_eighth => /(&#8539;|⅛)/,
+ :three_eighths => /(&#8540;|⅜)/,
+ :five_eighths => /(&#8541;|⅝)/,
+ :seven_eighths => /(&#8542;|⅞)/,
+ }
+
+ WHITESPACE = /\s+/
+
+ class << self
+ %w{
+ abbreviation
+ accented_html_entity
+ apostrophe
+ characters
+ cleanup_characters
+ cleanup_html_entities
+ currencies
+ currencies_simple
+ currencies_complex
+ html_entities
+ html_tag
+ smart_punctuation
+ vulgar_fractions
+ whitespace
+ }.each do |conversion_type|
+ define_method conversion_type do
+ const_get conversion_type.upcase
+ end
+ end
+ end
+ end
+ end
+end
@@ -0,0 +1,120 @@
+# encoding: UTF-8
+
+require 'stringex/localization/conversion_expressions'
+
+module Stringex
+ module Localization
+ class Converter
+ include ConversionExpressions
+
+ attr_reader :ending_whitespace, :options, :starting_whitespace, :string
+
+ def initialize(string, options = {})
+ @string = string.dup
+ @options = Stringex::Configuration::StringExtensions.default_settings.merge(options)
+ string =~ /^(\s+)/
+ @starting_whitespace = $1 unless $1 == ''
+ string =~ /(\s+)$/
+ @ending_whitespace = $1 unless $1 == ''
+ end
+
+ def cleanup_accented_html_entities!
+ string.gsub! expressions.accented_html_entity, '\1'
+ end
+
+ def cleanup_characters!
+ string.gsub! expressions.cleanup_characters, ' '
+ end
+
+ def cleanup_html_entities!
+ string.gsub! expressions.cleanup_html_entities, ''
+ end
+
+ def cleanup_smart_punctuation!
+ expressions.smart_punctuation.each do |expression, replacement|
+ string.gsub! expression, replacement
+ end
+ end
+
+ def smart_strip!
+ string.strip!
+ @string = "#{starting_whitespace}#{string}#{ending_whitespace}"
+ end
+
+ def strip!
+ string.strip!
+ end
+
+ def strip_html_tags!
+ string.gsub! expressions.html_tag, ''
+ end
+
+ def translate!(*conversions)
+ conversions.each do |conversion|
+ send conversion
+ end
+ end
+
+ protected
+
+ def abbreviations
+ string.gsub! expressions.abbreviation do |x|
+ x.gsub '.', ''
+ end
+ end
+
+ def apostrophes
+ string.gsub! expressions.apostrophe, '\1\2'
+ end
+
+ def characters
+ expressions.characters.each do |key, expression|
+ next if key == :slash && options[:allow_slash]
+ replacement = translate(key)
+ replacement = " #{replacement} " unless key == :dot
+ string.gsub! expression, replacement
+ end
+ end
+
+ def currencies
+ if has_currencies?
+ [:currencies_complex, :currencies_simple].each do |type|
+ expressions.send(type).each do |key, expression|
+ string.gsub! expression, " #{translate(key, :currencies)} "
+ end
+ end
+ end
+ end
+
+ def ellipses
+ string.gsub! expressions.characters[:ellipsis], " #{translate(:ellipsis)} "
+ end
+
+ def html_entities
+ expressions.html_entities.each do |key, expression|
+ string.gsub! expression, translate(key, :html_entities)
+ end
+ end
+
+ def vulgar_fractions
+ expressions.vulgar_fractions.each do |key, expression|
+ string.gsub! expression, translate(key, :vulgar_fractions)
+ end
+ end
+
+ private
+
+ def expressions
+ ConversionExpressions
+ end
+
+ def has_currencies?
+ string =~ CURRENCIES_SUPPORTED
+ end
+
+ def translate(key, scope = :characters)
+ Localization.translate scope, key
+ end
+ end
+ end
+end
@@ -18,25 +18,26 @@ module DefaultConversions
:star => "star",
}
- CURRENCIES = {
+ CURRENCIES_SIMPLE = {
:dollars => '\1 dollars',
- :dollars_cents => '\1 dollars \2 cents',
:euros => '\1 euros',
- :euros_cents => '\1 euros \2 cents',
:pounds => '\1 pounds',
- :pounds_pence => '\1 pounds \2 pence',
:yen => '\1 yen',
}
-
- CURRENCIES_SUPPORTED = %w{$ £ € ¥}
+ CURRENCIES_COMPLEX = {
+ :dollars_cents => '\1 dollars \2 cents',
+ :euros_cents => '\1 euros \2 cents',
+ :pounds_pence => '\1 pounds \2 pence',
+ }
+ CURRENCIES = CURRENCIES_SIMPLE.merge(CURRENCIES_COMPLEX)
HTML_ENTITIES = {
:amp => "and",
:cent => " cents",
:copy => "(c)",
:deg => " degrees ",
:divide => "divide",
- :double_quote => "\"",
+ :double_quote => '"',
:ellipsis => "...",
:en_dash => "-",
:em_dash => "--",
@@ -46,21 +47,23 @@ module DefaultConversions
:gt => ">",
:lt => "<",
:nbsp => " ",
- :pound => " pound",
+ :pound => " pounds",
:reg => "(r)",
:single_quote => "'",
:times => "x",
:trade => "(tm)",
+ :yen => " yen"
}
TRANSLITERATIONS = {}
+ # Ordered by denominator then numerator of the value
VULGAR_FRACTIONS = {
- :one_fourth => "one fourth",
:half => "half",
- :three_fourths => "three fourths",
:one_third => "one third",
:two_thirds => "two thirds",
+ :one_fourth => "one fourth",
+ :three_fourths => "three fourths",
:one_fifth => "one fifth",
:two_fifths => "two fifths",
:three_fifths => "three fifths",
Oops, something went wrong.

0 comments on commit 430efc1

Please sign in to comment.