Permalink
Browse files

Move Parser and Reader to the dedicated MultipartParser module

  • Loading branch information...
1 parent 095dfcc commit 34342e541008479f9e03cf8c900950b32141723b @lunar-debian lunar-debian committed Mar 12, 2012
View
@@ -1,244 +0,0 @@
-# A low level parser for multipart messages,
-# based on the node-formidable parser.
-class MultipartParser
-
- def initialize
- @boundary = nil
- @boundary_chars = nil
- @lookbehind = nil
- @state = :parser_uninitialized
- @index = 0 # Index into boundary or header
- @flags = {}
- @marks = {} # Keep track of different parts
- @callbacks = {}
- end
-
- # Initializes the parser, using the given boundary
- def init_with_boundary(boundary)
- @boundary = "\r\n--" + boundary
- @lookbehind = "\0"*(@boundary.length + 8)
- @state = :start
-
- @boundary_chars = {}
- @boundary.each_byte do |b|
- @boundary_chars[b] = true
- end
- end
-
- # Registers a callback to be called when the
- # given event occurs. Each callback is expected to
- # take three parameters: buffer, start_index, and end_index.
- # All of these parameters may be null, depending on the callback.
- # Valid callbacks are:
- # :end
- # :header_field
- # :header_value
- # :header_end
- # :headers_end
- # :part_begin
- # :part_data
- # :part_end
- def on(event, &callback)
- @callbacks[event] = callback
- end
-
- # Writes data to the parser.
- # Returns the number of bytes parsed.
- # In practise, this means that if the return value
- # is less than the buffer length, a parse error occured.
- def write(buffer)
- i = 0
- buffer_length = buffer.length
- index = @index
- flags = @flags.dup
- state = @state
- lookbehind = @lookbehind
- boundary = @boundary
- boundary_chars = @boundary_chars
- boundary_length = @boundary.length
- boundary_end = boundary_length - 1
-
- while i < buffer_length
- c = buffer[i, 1]
- case state
- when :parser_uninitialized
- return i;
- when :start
- index = 0;
- state = :start_boundary
- when :start_boundary # Differs in that it has no preceeding \r\n
- if index == boundary_length - 2
- return i unless c == "\r"
- index += 1
- elsif index - 1 == boundary_length - 2
- return i unless c == "\n"
- # Boundary read successfully, begin next part
- callback(:part_begin)
- state = :header_field_start
- else
- return i unless c == boundary[index+2, 1] # Unexpected character
- index += 1
- end
- i += 1
- when :header_field_start
- state = :header_field
- @marks[:header_field] = i
- index = 0
- when :header_field
- if c == "\r"
- @marks.delete :header_field
- state = :headers_almost_done
- else
- index += 1
- unless c == "-" # Skip hyphens
- if c == ":"
- return i if index == 1 # Empty header field
- data_callback(:header_field, buffer, i, :clear => true)
- state = :header_value_start
- else
- cl = c.downcase
- return i if cl < "a" || cl > "z"
- end
- end
- end
- i += 1
- when :header_value_start
- if c == " " # Skip spaces
- i += 1
- else
- @marks[:header_value] = i
- state = :header_value
- end
- when :header_value
- if c == "\r"
- data_callback(:header_value, buffer, i, :clear => true)
- callback(:header_end)
- state = :header_value_almost_done
- end
- i += 1
- when :header_value_almost_done
- return i unless c == "\n"
- state = :header_field_start
- i += 1
- when :headers_almost_done
- return i unless c == "\n"
- callback(:headers_end)
- state = :part_data_start
- i += 1
- when :part_data_start
- state = :part_data
- @marks[:part_data] = i
- when :part_data
- prev_index = index
-
- if index == 0
- # Boyer-Moore derived algorithm to safely skip non-boundary data
- # See http://debuggable.com/posts/parsing-file-uploads-at-500-
- # mb-s-with-node-js:4c03862e-351c-4faa-bb67-4365cbdd56cb
- while i + boundary_length <= buffer_length
- break if boundary_chars.has_key? buffer[i + boundary_end]
- i += boundary_length
- end
- c = buffer[i, 1]
- end
-
- if index < boundary_length
- if boundary[index, 1] == c
- if index == 0
- data_callback(:part_data, buffer, i, :clear => true)
- end
- index += 1
- else # It was not the boundary we found, after all
- index = 0
- end
- elsif index == boundary_length
- index += 1
- if c == "\r"
- flags[:part_boundary] = true
- elsif c == "-"
- flags[:last_boundary] = true
- else # We did not find a boundary after all
- index = 0
- end
- elsif index - 1 == boundary_length
- if flags[:part_boundary]
- index = 0
- if c == "\n"
- flags.delete :part_boundary
- callback(:part_end)
- callback(:part_begin)
- state = :header_field_start
- i += 1
- next # Ugly way to break out of the case statement
- end
- elsif flags[:last_boundary]
- if c == "-"
- callback(:part_end)
- callback(:end)
- state = :end
- else
- index = 0 # False alarm
- end
- else
- index = 0
- end
- end
-
- if index > 0
- # When matching a possible boundary, keep a lookbehind
- # reference in case it turns out to be a false lead
- lookbehind[index-1] = c
- elsif prev_index > 0
- # If our boundary turns out to be rubbish,
- # the captured lookbehind belongs to part_data
- callback(:part_data, lookbehind, 0, prev_index)
- @marks[:part_data] = i
-
- # Reconsider the current character as it might be the
- # beginning of a new sequence.
- i -= 1
- end
-
- i += 1
- when :end
- i += 1
- else
- return i;
- end
- end
-
- data_callback(:header_field, buffer, buffer_length)
- data_callback(:header_value, buffer, buffer_length)
- data_callback(:part_data, buffer, buffer_length)
-
- @index = index
- @state = state
- @flags = flags
-
- return buffer_length
- end
-
- private
-
- # Issues a callback.
- def callback(event, buffer = nil, start = nil, the_end = nil)
- return if !start.nil? && start == the_end
- if @callbacks.has_key? event
- @callbacks[event].call(buffer, start, the_end)
- end
- end
-
- # Issues a data callback,
- # The only valid options is :clear,
- # which, if true, will reset the appropriate mark to 0,
- # If not specified, the mark will be removed.
- def data_callback(data_type, buffer, the_end, options = {})
- return unless @marks.has_key? data_type
- callback(data_type, buffer, @marks[data_type], the_end)
- unless options[:clear]
- @marks[data_type] = 0
- else
- @marks.delete data_type
- end
- end
-end
Oops, something went wrong.

0 comments on commit 34342e5

Please sign in to comment.