Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'gzip' of github.com:martoche/em-http-request

  • Loading branch information...
commit ee337a58c120352effe49494b3224cca284acaed 2 parents 25c2ada + 6270932
@igrigorik igrigorik authored
View
167 lib/em-http/decoders.rb
@@ -91,52 +91,161 @@ def finalize
end
end
- class GZip < Base
- def self.encoding_names
- %w(gzip compressed)
+ ##
+ # Partial implementation of RFC 1952 to extract the deflate stream from a gzip file
+ class GZipHeader
+ def initialize
+ @state = :begin
+ @data = ""
+ @pos = 0
end
- def decompress(compressed)
- @buf ||= LazyStringIO.new
- @buf << compressed
+ def finished?
+ @state == :finish
+ end
- # Zlib::GzipReader loads input in 2048 byte chunks
- if @buf.size > 2048
- @gzip ||= Zlib::GzipReader.new @buf
- @gzip.readline
+ def read(n, buffer)
+ if (@pos + n) <= @data.size
+ buffer << @data[@pos..(@pos + n - 1)]
+ @pos += n
+ return true
+ else
+ return false
end
end
- def finalize
- begin
- @gzip ||= Zlib::GzipReader.new @buf
- @gzip.read
- rescue Zlib::Error
- raise DecoderError
+ def readbyte
+ if (@pos + 1) <= @data.size
+ @pos += 1
+ @data.getbyte(@pos - 1)
end
end
- class LazyStringIO
- def initialize(string="")
- @stream = string
+ def eof?
+ @pos >= @data.size
+ end
+
+ def extract_stream(compressed)
+ @data << compressed
+ pos = @pos
+
+ while !eof? && !finished?
+ buffer = ""
+
+ case @state
+ when :begin
+ break if !read(10, buffer)
+
+ if buffer.getbyte(0) != 0x1f || buffer.getbyte(1) != 0x8b
+ raise DecoderError.new("magic header not found")
+ end
+
+ if buffer.getbyte(2) != 0x08
+ raise DecoderError.new("unknown compression method")
+ end
+
+ @flags = buffer.getbyte(3)
+ if (@flags & 0xe0).nonzero?
+ raise DecoderError.new("unknown header flags set")
+ end
+
+ # We don't care about these values, I'm leaving the code for reference
+ # @time = buffer[4..7].unpack("V")[0] # little-endian uint32
+ # @extra_flags = buffer.getbyte(8)
+ # @os = buffer.getbyte(9)
+
+ @state = :extra_length
+
+ when :extra_length
+ if (@flags & 0x04).nonzero?
+ break if !read(2, buffer)
+ @extra_length = buffer.unpack("v")[0] # little-endian uint16
+ @state = :extra
+ else
+ @state = :extra
+ end
+
+ when :extra
+ if (@flags & 0x04).nonzero?
+ break if read(@extra_length, buffer)
+ @state = :name
+ else
+ @state = :name
+ end
+
+ when :name
+ if (@flags & 0x08).nonzero?
+ while !(buffer = readbyte).nil?
+ if buffer == 0
+ @state = :comment
+ break
+ end
+ end
+ else
+ @state = :comment
+ end
+
+ when :comment
+ if (@flags & 0x10).nonzero?
+ while !(buffer = readbyte).nil?
+ if buffer == 0
+ @state = :hcrc
+ break
+ end
+ end
+ else
+ @state = :hcrc
+ end
+
+ when :hcrc
+ if (@flags & 0x02).nonzero?
+ break if !read(2, buffer)
+ @state = :finish
+ else
+ @state = :finish
+ end
+ end
end
- def <<(string)
- @stream << string
+ if finished?
+ compressed[(@pos - pos)..-1]
+ else
+ ""
end
+ end
+ end
- def read(length=nil, buffer=nil)
- buffer ||= ""
- length ||= 0
- buffer << @stream[0..(length-1)]
- @stream = @stream[length..-1]
- buffer
+ class GZip < Base
+ def self.encoding_names
+ %w(gzip compressed)
+ end
+
+ def decompress(compressed)
+ @header ||= GZipHeader.new
+ if !@header.finished?
+ compressed = @header.extract_stream(compressed)
end
- def size
- @stream.size
+ @zstream ||= Zlib::Inflate.new(-Zlib::MAX_WBITS)
+ @zstream.inflate(compressed)
+ rescue Zlib::Error
+ raise DecoderError
+ end
+
+ def finalize
+ if @zstream
+ if !@zstream.finished?
+ r = @zstream.finish
+ end
+ @zstream.close
+ r
+ else
+ nil
end
+ rescue Zlib::Error
+ raise DecoderError
end
+
end
DECODERS = [Deflate, GZip]
View
BIN  spec/fixtures/gzip-sample.gz
Binary file not shown
View
68 spec/gzip_spec.rb
@@ -0,0 +1,68 @@
+require 'helper'
+
+describe EventMachine::HttpDecoders::GZip do
+
+ let(:compressed) {
+ compressed = ["1f8b08089668a6500003686900cbc8e402007a7a6fed03000000"].pack("H*")
+ }
+
+ it "should extract the stream of a vanilla gzip" do
+ header = EventMachine::HttpDecoders::GZipHeader.new
+ stream = header.extract_stream(compressed)
+
+ stream.unpack("H*")[0].should eq("cbc8e402007a7a6fed03000000")
+ end
+
+ it "should decompress a vanilla gzip" do
+ decompressed = ""
+
+ gz = EventMachine::HttpDecoders::GZip.new do |data|
+ decompressed << data
+ end
+
+ gz << compressed
+ gz.finalize!
+
+ decompressed.should eq("hi\n")
+ end
+
+ it "should decompress a vanilla gzip file byte by byte" do
+ decompressed = ""
+
+ gz = EventMachine::HttpDecoders::GZip.new do |data|
+ decompressed << data
+ end
+
+ compressed.each_char do |byte|
+ gz << byte
+ end
+
+ gz.finalize!
+
+ decompressed.should eq("hi\n")
+ end
+
+ it "should decompress a large file" do
+ decompressed = ""
+
+ gz = EventMachine::HttpDecoders::GZip.new do |data|
+ decompressed << data
+ end
+
+ gz << File.read(File.dirname(__FILE__) + "/fixtures/gzip-sample.gz")
+
+ gz.finalize!
+
+ decompressed.size.should eq(32907)
+ end
+
+ it "should fail with a DecoderError if not a gzip file" do
+ not_a_gzip = ["1f8c08089668a650000"].pack("H*")
+ header = EventMachine::HttpDecoders::GZipHeader.new
+
+ lambda {
+ header.extract_stream(not_a_gzip)
+ }.should raise_exception(EventMachine::HttpDecoders::DecoderError)
+ end
+
+end
Please sign in to comment.
Something went wrong with that request. Please try again.