Permalink
Browse files

Replace Treetop parser with a Ragel based parser

  • Loading branch information...
bpot committed Jan 8, 2013
1 parent ac02a2e commit 2da7c7985c221272f6451b27ab8b41e84e0a6804
Showing with 32,791 additions and 13,400 deletions.
  1. +0 −1 Gemfile
  2. +0 −35 lib/load_parsers.rb
  3. +1 −1 lib/mail.rb
  4. +38 −82 lib/mail/elements/address.rb
  5. +19 −42 lib/mail/elements/address_list.rb
  6. +3 −7 lib/mail/elements/content_disposition_element.rb
  7. +2 −6 lib/mail/elements/content_location_element.rb
  8. +3 −10 lib/mail/elements/content_transfer_encoding_element.rb
  9. +4 −8 lib/mail/elements/content_type_element.rb
  10. +3 −7 lib/mail/elements/date_time_element.rb
  11. +3 −11 lib/mail/elements/envelope_from_element.rb
  12. +1 −6 lib/mail/elements/message_ids_element.rb
  13. +3 −7 lib/mail/elements/mime_version_element.rb
  14. +2 −7 lib/mail/elements/phrase_list.rb
  15. +3 −7 lib/mail/elements/received_element.rb
  16. +0 −5 lib/mail/envelope.rb
  17. +15 −20 lib/mail/fields/common/common_address.rb
  18. +0 −7 lib/mail/fields/common/common_date.rb
  19. +0 −6 lib/mail/fields/content_transfer_encoding_field.rb
  20. +1 −1 lib/mail/fields/resent_sender_field.rb
  21. +1 −1 lib/mail/fields/sender_field.rb
  22. +26 −0 lib/mail/parsers.rb
  23. +0 −64 lib/mail/parsers/address_lists.rb
  24. +0 −19 lib/mail/parsers/address_lists.treetop
  25. +128 −0 lib/mail/parsers/address_lists_parser.rb
  26. +0 −535 lib/mail/parsers/content_disposition.rb
  27. +0 −46 lib/mail/parsers/content_disposition.treetop
  28. +67 −0 lib/mail/parsers/content_disposition_parser.rb
  29. +0 −139 lib/mail/parsers/content_location.rb
  30. +0 −20 lib/mail/parsers/content_location.treetop
  31. +35 −0 lib/mail/parsers/content_location_parser.rb
  32. +0 −201 lib/mail/parsers/content_transfer_encoding.rb
  33. +0 −18 lib/mail/parsers/content_transfer_encoding.treetop
  34. +33 −0 lib/mail/parsers/content_transfer_encoding_parser.rb
  35. +0 −971 lib/mail/parsers/content_type.rb
  36. +0 −68 lib/mail/parsers/content_type.treetop
  37. +64 −0 lib/mail/parsers/content_type_parser.rb
  38. +0 −114 lib/mail/parsers/date_time.rb
  39. +0 −11 lib/mail/parsers/date_time.treetop
  40. +36 −0 lib/mail/parsers/date_time_parser.rb
  41. +0 −194 lib/mail/parsers/envelope_from.rb
  42. +0 −32 lib/mail/parsers/envelope_from.treetop
  43. +45 −0 lib/mail/parsers/envelope_from_parser.rb
  44. +0 −45 lib/mail/parsers/message_ids.rb
  45. +0 −15 lib/mail/parsers/message_ids.treetop
  46. +39 −0 lib/mail/parsers/message_ids_parser.rb
  47. +0 −144 lib/mail/parsers/mime_version.rb
  48. +0 −19 lib/mail/parsers/mime_version.treetop
  49. +41 −0 lib/mail/parsers/mime_version_parser.rb
  50. +0 −45 lib/mail/parsers/phrase_lists.rb
  51. +0 −15 lib/mail/parsers/phrase_lists.treetop
  52. +33 −0 lib/mail/parsers/phrase_lists_parser.rb
  53. +17 −0 lib/mail/parsers/ragel.rb
  54. +184 −0 lib/mail/parsers/ragel/common.rl
  55. +30 −0 lib/mail/parsers/ragel/date_time.rl
  56. +61 −0 lib/mail/parsers/ragel/parser_info.rb
  57. +29 −0 lib/mail/parsers/ragel/ruby.rb
  58. +14,857 −0 lib/mail/parsers/ragel/ruby/machines/address_lists_machine.rb
  59. +37 −0 lib/mail/parsers/ragel/ruby/machines/address_lists_machine.rb.rl
  60. +751 −0 lib/mail/parsers/ragel/ruby/machines/content_disposition_machine.rb
  61. +37 −0 lib/mail/parsers/ragel/ruby/machines/content_disposition_machine.rb.rl
  62. +614 −0 lib/mail/parsers/ragel/ruby/machines/content_location_machine.rb
  63. +37 −0 lib/mail/parsers/ragel/ruby/machines/content_location_machine.rb.rl
  64. +447 −0 lib/mail/parsers/ragel/ruby/machines/content_transfer_encoding_machine.rb
  65. +37 −0 lib/mail/parsers/ragel/ruby/machines/content_transfer_encoding_machine.rb.rl
  66. +825 −0 lib/mail/parsers/ragel/ruby/machines/content_type_machine.rb
  67. +37 −0 lib/mail/parsers/ragel/ruby/machines/content_type_machine.rb.rl
  68. +817 −0 lib/mail/parsers/ragel/ruby/machines/date_time_machine.rb
  69. +37 −0 lib/mail/parsers/ragel/ruby/machines/date_time_machine.rb.rl
  70. +2,118 −0 lib/mail/parsers/ragel/ruby/machines/envelope_from_machine.rb
  71. +37 −0 lib/mail/parsers/ragel/ruby/machines/envelope_from_machine.rb.rl
  72. +1,563 −0 lib/mail/parsers/ragel/ruby/machines/message_ids_machine.rb
  73. +37 −0 lib/mail/parsers/ragel/ruby/machines/message_ids_machine.rb.rl
  74. +440 −0 lib/mail/parsers/ragel/ruby/machines/mime_version_machine.rb
  75. +37 −0 lib/mail/parsers/ragel/ruby/machines/mime_version_machine.rb.rl
  76. +564 −0 lib/mail/parsers/ragel/ruby/machines/phrase_lists_machine.rb
  77. +37 −0 lib/mail/parsers/ragel/ruby/machines/phrase_lists_machine.rb.rl
  78. +51 −0 lib/mail/parsers/ragel/ruby/machines/rb_actions.rl
  79. +5,002 −0 lib/mail/parsers/ragel/ruby/machines/received_machine.rb
  80. +37 −0 lib/mail/parsers/ragel/ruby/machines/received_machine.rb.rl
  81. +37 −0 lib/mail/parsers/ragel/ruby/parser.rb.rl.erb
  82. +0 −71 lib/mail/parsers/received.rb
  83. +0 −11 lib/mail/parsers/received.treetop
  84. +46 −0 lib/mail/parsers/received_parser.rb
  85. +0 −421 lib/mail/parsers/rfc2045.rb
  86. +0 −35 lib/mail/parsers/rfc2045.treetop
  87. +0 −5,397 lib/mail/parsers/rfc2822.rb
  88. +0 −408 lib/mail/parsers/rfc2822.treetop
  89. +0 −3,768 lib/mail/parsers/rfc2822_obsolete.rb
  90. +0 −241 lib/mail/parsers/rfc2822_obsolete.treetop
  91. +53 −0 lib/tasks/ragel.rake
  92. +0 −10 lib/tasks/treetop.rake
  93. +0 −1 mail.gemspec
  94. +3,195 −0 reference/rfc5322 Internet Message Format.txt
  95. +2 −22 spec/mail/elements/address_list_spec.rb
  96. +7 −0 spec/mail/elements/address_spec.rb
  97. +1 −0 spec/mail/example_emails_spec.rb
  98. +1 −0 spec/mail/fields/content_type_field_spec.rb
  99. +2 −2 spec/mail/parsers/address_lists_parser_spec.rb
  100. +21 −21 spec/mail/parsers/content_transfer_encoding_parser_spec.rb
View
@@ -2,7 +2,6 @@ source 'https://rubygems.org'
gemspec
-gem "treetop", "~> 1.4.10"
gem "mime-types", "~> 1.16"
gem "tlsmail" if RUBY_VERSION <= '1.8.6'
View
@@ -1,35 +0,0 @@
-# encoding: utf-8
-# This file loads up the parsers for mail to use. It also will attempt to compile parsers
-# if they don't exist.
-#
-# It also only uses the compiler if we are running the SPEC suite
-module Mail # :doc:
- require 'treetop/runtime'
-
- def self.compile_parser(parser)
- require 'treetop/compiler'
- Treetop.load(File.join(File.dirname(__FILE__)) + "/mail/parsers/#{parser}")
- end
-
- parsers = %w[ rfc2822_obsolete rfc2822 address_lists phrase_lists
- date_time received message_ids envelope_from rfc2045
- mime_version content_type content_disposition
- content_transfer_encoding content_location ]
-
- if defined?(MAIL_SPEC_SUITE_RUNNING)
- parsers.each do |parser|
- compile_parser(parser)
- end
-
- else
- parsers.each do |parser|
- begin
- require "mail/parsers/#{parser}"
- rescue LoadError
- compile_parser(parser)
- end
- end
-
- end
-
-end
View
@@ -76,7 +76,7 @@ def self.eager_autoload!
require 'mail/envelope'
- require 'load_parsers'
+ require 'mail/parsers'
# Autoload header field elements and transfer encodings.
require 'mail/elements'
@@ -1,7 +1,7 @@
# encoding: utf-8
module Mail
class Address
-
+
include Mail::Utilities
# Mail::Address handles all email addresses in Mail. It takes an email address string
@@ -22,21 +22,19 @@ class Address
# a.to_s #=> 'Mikel Lindsaar <mikel@test.lindsaar.net> (My email address)'
def initialize(value = nil)
@output_type = :decode
- @tree = nil
- @raw_text = value
- case
- when value.nil?
+ if value.nil?
@parsed = false
+ @data = nil
return
else
parse(value)
end
end
- # Returns the raw imput of the passed in string, this is before it is passed
+ # Returns the raw input of the passed in string, this is before it is passed
# by the parser.
def raw
- @raw_text
+ @data.raw
end
# Returns a correctly formatted address for the email going out. If given
@@ -48,15 +46,14 @@ def raw
# a.format #=> 'Mikel Lindsaar <mikel@test.lindsaar.net> (My email address)'
def format
parse unless @parsed
- case
- when tree.nil?
+ if @data.nil?
''
- when display_name
+ elsif display_name
[quote_phrase(display_name), "<#{address}>", format_comments].compact.join(" ")
- when address
+ elsif address
[address, format_comments].compact.join(" ")
else
- tree.text_value
+ raw
end
end
@@ -106,7 +103,7 @@ def display_name=( str )
# a.local #=> 'mikel'
def local
parse unless @parsed
- "#{obs_domain_list}#{get_local.strip}" if get_local
+ "#{@data.obs_domain_list}#{get_local.strip}" if get_local
end
# Returns the domain part (the right hand side of the @ sign in the email address) of
@@ -174,29 +171,24 @@ def decoded
def parse(value = nil)
@parsed = true
- case
- when value.nil?
+
+ case value
+ when NilClass
+ @data = nil
nil
- when value.class == String
- self.tree = Mail::AddressList.new(value).address_nodes.first
- else
- self.tree = value
+ when Mail::Parsers::AddressStruct
+ @data = value
+ when String
+ @raw_text = value
+ if value.blank?
+ @data = nil
+ else
+ address_list = Mail::Parsers::AddressListsParser.new.parse(value)
+ @data = address_list.addresses.first
+ end
end
end
-
- def get_domain
- if tree.respond_to?(:angle_addr) && tree.angle_addr.respond_to?(:addr_spec) && tree.angle_addr.addr_spec.respond_to?(:domain)
- @domain_text ||= tree.angle_addr.addr_spec.domain.text_value.strip
- elsif tree.respond_to?(:domain)
- @domain_text ||= tree.domain.text_value.strip
- elsif tree.respond_to?(:addr_spec) && tree.addr_spec.respond_to?(:domain)
- tree.addr_spec.domain.text_value.strip
- else
- nil
- end
- end
-
def strip_all_comments(string)
unless comments.blank?
comments.each do |comment|
@@ -209,28 +201,19 @@ def strip_all_comments(string)
def strip_domain_comments(value)
unless comments.blank?
comments.each do |comment|
- if get_domain && get_domain.include?("(#{comment})")
+ if @data.domain && @data.domain.include?("(#{comment})")
value = value.gsub("(#{comment})", '')
end
end
end
value.to_s.strip
end
- def get_comments
- if tree.respond_to?(:comments)
- @comments = tree.comments.map { |c| unparen(c.text_value.to_str) }
- else
- @comments = []
- end
- end
-
def get_display_name
- if tree.respond_to?(:display_name)
- name = unquote(tree.display_name.text_value.strip)
- str = strip_all_comments(name.to_s)
- elsif comments
- if domain
+ if @data.display_name
+ str = strip_all_comments(@data.display_name.to_s)
+ elsif @data.comments
+ if @data.domain
str = strip_domain_comments(format_comments)
else
str = nil
@@ -263,15 +246,6 @@ def get_name
end
end
- # Provides access to the Treetop parse tree for this address
- def tree
- @tree
- end
-
- def tree=(value)
- @tree = value
- end
-
def format_comments
if comments
comment_text = comments.map {|c| escape_paren(c) }.join(' ').squeeze(" ")
@@ -280,35 +254,17 @@ def format_comments
nil
end
end
-
- def obs_domain_list
- if tree.respond_to?(:angle_addr)
- obs = tree.angle_addr.elements.select { |e| e.respond_to?(:obs_domain_list) }
- !obs.empty? ? obs.first.text_value : nil
- else
- nil
- end
- end
-
+
def get_local
- case
- when tree.respond_to?(:local_dot_atom_text)
- tree.local_dot_atom_text.text_value
- when tree.respond_to?(:angle_addr) && tree.angle_addr.respond_to?(:addr_spec) && tree.angle_addr.addr_spec.respond_to?(:local_part)
- tree.angle_addr.addr_spec.local_part.text_value
- when tree.respond_to?(:addr_spec) && tree.addr_spec.respond_to?(:local_part)
- tree.addr_spec.local_part.text_value
- when tree.respond_to?(:angle_addr) && tree.angle_addr.respond_to?(:addr_spec) && tree.angle_addr.addr_spec.respond_to?(:local_dot_atom_text)
- # Ignore local dot atom text when in angle brackets
- nil
- when tree.respond_to?(:addr_spec) && tree.addr_spec.respond_to?(:local_dot_atom_text)
- # Ignore local dot atom text when in angle brackets
- nil
- else
- tree && tree.respond_to?(:local_part) ? tree.local_part.text_value : nil
- end
+ @data && @data.local
+ end
+
+ def get_domain
+ @data && @data.domain
end
-
+ def get_comments
+ @data && @data.comments
+ end
end
end
@@ -1,7 +1,7 @@
# encoding: utf-8
module Mail
class AddressList # :nodoc:
-
+
# Mail::AddressList is the class that parses To, From and other address fields from
# emails passed into Mail.
#
@@ -18,57 +18,34 @@ class AddressList # :nodoc:
# a.addresses #=> [#<Mail::Address:14943130 Address: |ada@test.lindsaar.net...
# a.group_names #=> ["My Group"]
def initialize(string)
- if string.blank?
- @address_nodes = []
- return self
- end
- parser = Mail::AddressListsParser.new
- if tree = parser.parse(string)
- @address_nodes = tree.addresses
- else
- raise Mail::Field::ParseError.new(AddressListsParser, string, parser.failure_reason)
- end
+ @addresses_grouped_by_group = nil
+ @address_list = Parsers::AddressListsParser.new.parse(string)
end
# Returns a list of address objects from the parsed line
def addresses
- @addresses ||= get_addresses.map do |address_tree|
- Mail::Address.new(address_tree)
+ @addresses ||= @address_list.addresses.map do |address_data|
+ Mail::Address.new(address_data)
end
end
-
- # Returns a list of all recipient syntax trees that are not part of a group
- def individual_recipients # :nodoc:
- @individual_recipients ||= @address_nodes - group_recipients
- end
-
- # Returns a list of all recipient syntax trees that are part of a group
- def group_recipients # :nodoc:
- @group_recipients ||= @address_nodes.select { |an| an.respond_to?(:group_name) }
+
+ def addresses_grouped_by_group
+ return @addresses_grouped_by_group if @addresses_grouped_by_group
+
+ @addresses_grouped_by_group = {}
+
+ @address_list.addresses.each do |address_data|
+ if group = address_data.group
+ @addresses_grouped_by_group[group] ||= []
+ @addresses_grouped_by_group[group] << Mail::Address.new(address_data)
+ end
+ end
+ @addresses_grouped_by_group
end
# Returns the names as an array of strings of all groups
def group_names # :nodoc:
- group_recipients.map { |g| g.group_name.text_value }
- end
-
- # Returns a list of address syntax trees
- def address_nodes # :nodoc:
- @address_nodes
- end
-
- private
-
- def get_addresses
- (individual_recipients + group_recipients.map { |g| get_group_addresses(g) }).flatten
- end
-
- def get_group_addresses(g)
- if g.group_list.respond_to?(:addresses)
- g.group_list.addresses
- else
- []
- end
+ @address_list.group_names
end
end
end
@@ -5,13 +5,9 @@ class ContentDispositionElement # :nodoc:
include Mail::Utilities
def initialize( string )
- parser = Mail::ContentDispositionParser.new
- if tree = parser.parse(cleaned(string))
- @disposition_type = tree.disposition_type.text_value.downcase
- @parameters = tree.parameters
- else
- raise Mail::Field::ParseError.new(ContentDispositionElement, string, parser.failure_reason)
- end
+ content_disposition = Mail::Parsers::ContentDispositionParser.new.parse(cleaned(string))
+ @disposition_type = content_disposition.disposition_type
+ @parameters = content_disposition.parameters
end
def disposition_type
@@ -5,12 +5,8 @@ class ContentLocationElement # :nodoc:
include Mail::Utilities
def initialize( string )
- parser = Mail::ContentLocationParser.new
- if tree = parser.parse(string)
- @location = tree.location.text_value
- else
- raise Mail::Field::ParseError.new(ContentLocationElement, string, parser.failure_reason)
- end
+ content_location = Mail::Parsers::ContentLocationParser.new.parse(string)
+ @location = content_location.location
end
def location
@@ -4,16 +4,9 @@ class ContentTransferEncodingElement
include Mail::Utilities
- def initialize( string )
- parser = Mail::ContentTransferEncodingParser.new
- case
- when string.blank?
- @encoding = ''
- when tree = parser.parse(string.to_s.downcase)
- @encoding = tree.encoding.text_value
- else
- raise Mail::Field::ParseError.new(ContentTransferEncodingElement, string, parser.failure_reason)
- end
+ def initialize(string)
+ content_transfer_encoding = Mail::Parsers::ContentTransferEncodingParser.new.parse(string)
+ @encoding = content_transfer_encoding.encoding
end
def encoding
Oops, something went wrong.

0 comments on commit 2da7c79

Please sign in to comment.