From 2b44c8735c9c4a6510043668044ab635a193375f Mon Sep 17 00:00:00 2001 From: Rocco Caputo Date: Sat, 22 Aug 2009 05:15:52 -0400 Subject: [PATCH] Mainly rewrite POE::Filter::HTTPD's parser. May support get_one() now, so some tests also had to change. I hope this will resolve rt.cpan.org ticket 48802, reported by a very patient zerhash. --- poe/lib/POE/Filter/HTTPD.pm | 302 ++++++++++------------------ poe/t/10_units/05_filters/03_http.t | 54 +++-- 2 files changed, 141 insertions(+), 215 deletions(-) diff --git a/poe/lib/POE/Filter/HTTPD.pm b/poe/lib/POE/Filter/HTTPD.pm index 6fe00a04..4485b774 100644 --- a/poe/lib/POE/Filter/HTTPD.pm +++ b/poe/lib/POE/Filter/HTTPD.pm @@ -1,9 +1,7 @@ # Filter::HTTPD Copyright 1998 Artur Bergman . - # Thanks go to Gisle Aas for his excellent HTTP::Daemon. Some of the # get code was copied out if, unfortunately HTTP::Daemon is not easily # subclassed for POE because of the blocking nature. - # 2001-07-27 RCC: This filter will not support the newer get_one() # interface. It gets single things by default, and it does not # support filter switching. If someone absolutely needs to switch to @@ -15,14 +13,19 @@ use strict; use POE::Filter; use vars qw($VERSION @ISA); -$VERSION = '1.007'; # NOTE - Should be #.### (three decimal places) +$VERSION = '1.007'; +# NOTE - Should be #.### (three decimal places) @ISA = qw(POE::Filter); -sub BUFFER () { 0 } -sub TYPE () { 1 } -sub FINISH () { 2 } -sub HEADER () { 3 } -sub CLIENT_PROTO () { 4 } +sub BUFFER () { 0 } # raw data buffer to build requests +sub STATE () { 1 } # built a full request +sub REQUEST () { 2 } # partial request being built +sub CLIENT_PROTO () { 3 } # client protoco version requested +sub CONTENT_LEN () { 4 } # expected content length +sub CONTENT_ADDED () { 5 } # amount of content added to request + +sub ST_HEADERS () { 0x01 } # waiting for complete header block +sub ST_CONTENT () { 0x02 } # waiting for complete body use Carp qw(croak); use HTTP::Status qw( status_message RC_BAD_REQUEST RC_OK RC_LENGTH_REQUIRED ); @@ -38,156 +41,69 @@ my $HTTP_1_1 = _http_version("HTTP/1.1"); sub new { my $type = shift; - my $self = [ - '', # BUFFER - 0, # TYPE - 0, # FINISH - undef, # HEADER - undef, # CLIENT_PROTO - ]; - bless $self, $type; - $self; + return bless( + [ + '', # BUFFER + ST_HEADERS, # STATE + undef, # REQUEST + undef, # CLIENT_PROTO + 0, # CONTENT_LEN + 0, # CONTENT_ADDED + ], + $type + ); } #------------------------------------------------------------------------------ sub get_one_start { - my ($self, $stream) = @_; - return if ( $self->[FINISH] ); - $stream = [ $stream ] unless ( ref( $stream ) ); - $self->[BUFFER] .= join( '', @$stream ); + my ($self, $stream) = @_; + $self->[BUFFER] .= join( '', @$stream ); } sub get_one { - my ($self) = @_; - return ( $self->[FINISH] ) ? [] : $self->get( [] ); -} - -sub get { - my ($self, $stream) = @_; + my ($self) = @_; # Need to check lengths in octets, not characters. BEGIN { eval { require bytes } and bytes->import; } - # Why? - local($_); - - # Sanity check. "finish" is set when a request has completely - # arrived. Subsequent get() calls on the same request should not - # happen. - # TODO Maybe this should return [] instead of dying? - - if ($self->[FINISH]) { - - # This works around a request length vs. actual content length - # error. Looks like some browsers (mozilla!) sometimes add on an - # extra newline? - - # return [] unless @$stream and grep /\S/, @$stream; - - my @dump; - my $offset = 0; - $stream = $self->[BUFFER].join("", @$stream); - while (length $stream) { - my $line = substr($stream, 0, 16); - substr($stream, 0, 16) = ''; - - my $hexdump = unpack 'H*', $line; - $hexdump =~ s/(..)/$1 /g; - - $line =~ tr[ -~][.]c; - push @dump, sprintf( "%04x %-47.47s - %s\n", $offset, $hexdump, $line ); - $offset += 16; - } - - return [ - $self->_build_error( - RC_BAD_REQUEST, - "Did not want any more data. Got this:" . - "

" . join("", @dump) . "

" - ) - ]; - } - - # Accumulate data in a framing buffer. - - $self->[BUFFER] .= join('', @$stream); + # Waiting for a complete suite of headers. + if ($self->[STATE] & ST_HEADERS) { + # Strip leading whitespace. + $self->[BUFFER] =~ s/^\s+//; - # If headers were already received, then the framing buffer is - # purely content. Return nothing until content-length bytes are in - # the buffer, then return the entire request. + # No blank line yet. Side effect: Raw headers block is extracted + # from the input buffer. + return [] unless ( + $self->[BUFFER] =~ + s/^(\S.*?(?:\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?))//s + ); - if ($self->[HEADER]) { - my $buf = $self->[BUFFER]; - my $r = $self->[HEADER]; - my $cl = $r->content_length() || length($buf) || 0; + # Raw headers block from the input buffer. + my $rh = $1; - # Some browsers (like MSIE 5.01) send extra CRLFs after the - # content. Shame on them. Now we need a special case to drop - # their extra crap. - # - # We use the first $cl octets of the buffer as the request - # content. It's then stripped away. Leading whitespace in - # whatever is left is also stripped away. Any nonspace data left - # over will throw an error. - # - # Four-argument substr() would be ideal here, but it's a - # relatively recent development. - # - # PG- CGI.pm only reads Content-Length: bytes from STDIN. - if (length($buf) >= $cl) { - $r->content(substr($buf, 0, $cl)); - $self->[BUFFER] = substr($buf, $cl); - $self->[BUFFER] =~ s/^\s+//; - - # We are sending this back, so won't need it anymore. - $self->[HEADER] = undef; - $self->[FINISH]++; - return [$r]; + # Parse the request line. + if ($rh !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) { + return [ + $self->_build_error(RC_BAD_REQUEST, "Request line parse failure. ($rh)") + ]; } - #print "$cl wanted, got " . length($buf) . "\n"; - return []; - } + # Create an HTTP::Request object from values in the request line. + my ($method, $request_path, $proto) = ($1, $2, ($3 || "HTTP/0.9")); - # Headers aren't already received. Short-circuit header parsing: - # don't return anything until we've received a blank line. + # Fix a double starting slash on the path. It happens. + $request_path =~ s!^//+!/!; - return [] unless( - $self->[BUFFER] =~ /(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)/s - ); + my $r = HTTP::Request->new($method, URI->new($request_path)); + $r->protocol($proto); + $self->[CLIENT_PROTO] = $proto = _http_version($proto); - # Copy the buffer for header parsing, and remove the header block - # from the content buffer. + # Parse headers. - my $buf = $self->[BUFFER]; - $self->[BUFFER] =~ s/.*?(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)//s; - - # Parse the request line. - if ($buf !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) { - return [ - $self->_build_error(RC_BAD_REQUEST, "Request line parse failure.") - ]; - } - my $proto = $3 || "HTTP/0.9"; - - # Use the request line to create a request object. - - my $method = $1; - my $req_path = $2; - $req_path =~ s/^[\/]{2,}/\//; # fix double slash starting path - - my $r = HTTP::Request->new($method, URI->new($req_path)); - $r->protocol($proto); - $self->[CLIENT_PROTO] = $proto = _http_version($proto); - - # Add the raw request's headers to the request object we'll be - # returning. - - if ($proto >= $HTTP_1_0) { - my ($key,$val); - HEADER: while ($buf =~ s/^([^\012]*)\012//) { - $_ = $1; + my ($key, $val); + HEADER: while ($rh =~ s/^([^\012]*)\012//) { + local $_ = $1; s/\015$//; if (/^([\w\-~]+)\s*:\s*(.*)/) { $r->push_header($key, $val) if $key; @@ -200,75 +116,70 @@ sub get { last HEADER; } } - $r->push_header($key,$val) if($key); - } - $self->[HEADER] = $r; + $r->push_header($key, $val) if $key; - # If this is a GET or HEAD request, we won't be expecting a message - # body. Finish up. - $method = uc $r->method(); - if ($method eq 'GET' or $method eq 'HEAD') { - $self->[FINISH]++; - # We are sending this back, so won't need it anymore. - $self->[HEADER] = undef; - return [$r]; - } + # We got a full set of headers. Fall through to content if we + # have a content length. - # However, if it's any other type of request, check whether the - # entire content has already been received! If so, add that to the - # request and we're done. Otherwise we'll expect a subsequent get() - # call to finish things up. + my $cl = $r->content_length() || 0; + $cl =~ s/\D.*$//; + $cl ||= 0; - #print "post:$buf:\END BUFFER\n"; - #print length($buf)."-".$r->content_length()."\n"; + unless ($cl) { + if ($self->[CLIENT_PROTO] >= $HTTP_1_1) { + $r = $self->_build_error(RC_LENGTH_REQUIRED, "No content length found.") + } - my $cl = $r->content_length(); - unless(defined $cl) { - if($self->[CLIENT_PROTO] == 9) { - return [ - $self->_build_error( - RC_BAD_REQUEST, - "POST request detected in an HTTP 0.9 transaction. " . - "POST is not a valid HTTP 0.9 transaction type. " . - "Please verify your HTTP version and transaction content." - ) - ]; - } - elsif ($method eq 'OPTIONS') { - $self->[FINISH]++; - # OPTIONS requests can have an optional content length - # See http://www.faqs.org/rfcs/rfc2616.html, section 9.2 - $self->[HEADER] = undef; - return [$r]; - } - else { - return [ - $self->_build_error(RC_LENGTH_REQUIRED, "No content length found.") - ]; + $self->[STATE] = ST_HEADERS; + @$self[REQUEST, CLIENT_PROTO] = (undef, undef); + @$self[CONTENT_LEN, CONTENT_ADDED] = (0, 0); + + return [ $r ]; } - } - unless ($cl =~ /^\d+$/) { - return [ - $self->_build_error( - RC_BAD_REQUEST, - "Content length contains non-digits." - ) - ]; + $self->[REQUEST] = $r; + $self->[CONTENT_LEN] = $cl; + $self->[STATE] = ST_CONTENT; + # Fall through to content. } - if (length($buf) >= $cl) { - $r->content(substr($buf, 0, $cl)); - $self->[BUFFER] = substr($buf, $cl); + # Waiting for content. + if ($self->[STATE] & ST_CONTENT) { + my $r = $self->[REQUEST]; + my $cl_needed = $self->[CONTENT_LEN] - $self->[CONTENT_ADDED]; + die "already got enough content ($cl_needed needed)" if $cl_needed < 1; + + # Not enough content to complete the request. Add it to the + # request content, and return an incomplete status. + if (length($self->[BUFFER]) < $cl_needed) { + $r->add_content($self->[BUFFER]); + $self->[CONTENT_ADDED] += length($self->[BUFFER]); + $self->[BUFFER] = ""; + return []; + } + + # Enough data. Add it to the request content. + # PG- CGI.pm only reads Content-Length: bytes from STDIN. + + # Four-argument substr() would be ideal here, but it's not + # entirely backward compatible. + $r->add_content(substr($self->[BUFFER], 0, $cl_needed)); + substr($self->[BUFFER], 0, $cl_needed) = ""; + + # Some browsers (like MSIE 5.01) send extra CRLFs after the + # content. Shame on them. $self->[BUFFER] =~ s/^\s+//; - $self->[FINISH]++; - # We are sending this back, so won't need it anymore. - $self->[HEADER] = undef; - return [$r]; + + # Prepare for the next request, and return this one. + $self->[STATE] = ST_HEADERS; + @$self[REQUEST, CLIENT_PROTO] = (undef, undef); + @$self[CONTENT_LEN, CONTENT_ADDED] = (0, 0); + return [ $r ]; } - return []; + # What are we waiting for? + die "unknown state $self->[STATE]"; } #------------------------------------------------------------------------------ @@ -303,9 +214,6 @@ sub put { push @raw, join("\x0D\x0A", @headers, "") . $_->content; } - # Allow next request after we're done sending the response. - $self->[FINISH]--; - \@raw; } diff --git a/poe/t/10_units/05_filters/03_http.t b/poe/t/10_units/05_filters/03_http.t index a2b7a335..34f13725 100644 --- a/poe/t/10_units/05_filters/03_http.t +++ b/poe/t/10_units/05_filters/03_http.t @@ -23,7 +23,7 @@ BEGIN { } BEGIN { - plan tests => 91; + plan tests => 98; } use_ok('POE::Filter::HTTPD'); @@ -196,22 +196,33 @@ SKIP: { # simple put {{{ } # }}} { # multipart form data post {{{ - my $request = POST 'http://localhost/foo.mhtml', Content_Type => 'form-data', - content => [ 'I' => 'like', 'tasty' => 'pie', - file => [ $0 ] - ]; + my $request = POST( + 'http://localhost/foo.mhtml', + Content_Type => 'form-data', + content => [ + 'I' => 'like', 'tasty' => 'pie', file => [ $0 ] + ] + ); $request->protocol('HTTP/1.0'); my $filter = POE::Filter::HTTPD->new(); my $data = $filter->get([ $request->as_string ]); - is(ref $data, 'ARRAY', 'multipart form data: get() returns list of requests'); - is(scalar @$data, 1, 'multipart form data: get() returned single request'); + is( + ref($data), 'ARRAY', + 'multipart form data: get() returns list of requests' + ); + is( + scalar(@$data), 1, + 'multipart form data: get() returned single request' + ); my ($req) = @$data; - isa_ok($req, 'HTTP::Request', - 'multipart form data: get() returns HTTP::Request object'); + isa_ok( + $req, 'HTTP::Request', + 'multipart form data: get() returns HTTP::Request object' + ); check_fields($req, { method => 'POST', @@ -358,19 +369,23 @@ END # request + trailing whitespace in separate get == just request { my $filter = POE::Filter::HTTPD->new; - $filter->get([ $req->as_string ]); # assume this one is fine - my $data = $filter->get([ "\r\n \r\n\n" ]); + my $data = $filter->get([ $req->as_string, "\r\n \r\n\n" ]); is(ref($data), 'ARRAY', 'trailing: extra whitespace get: ref'); - is(scalar(@$data), 1, 'trailing: extra whitespace get: no req'); + is(scalar(@$data), 1, 'trailing: extra whitespace get: only one response'); + $data = $filter->get([ "\r\n \r\n\n" ]); + is(ref($data), 'ARRAY', 'trailing: whitespace by itself: ref'); + is(scalar(@$data), 0, 'trailing: whitespace by itself: no req'); } # request + garbage in separate get == error { my $filter = POE::Filter::HTTPD->new; - $filter->get([ $req->as_string ]); # assume this one is fine - my $data = $filter->get([ $req->as_string, "GARBAGE!" ]); - check_error_response($data, RC_BAD_REQUEST, - 'trailing: error with trailing garbage'); + my $data = $filter->get([ $req->as_string, "GARBAGE!\r\n\r\n" ]); + + is(ref($data), 'ARRAY', 'trailing: whitespace by itself: ref'); + is(scalar(@$data), 2, 'trailing: whitespace by itself: no req'); + isa_ok($data->[0], 'HTTP::Request'); + isa_ok($data->[1], 'HTTP::Response'); } } # }}} @@ -417,8 +432,11 @@ TODO: { # wishlist for supporting get_pending! {{{ my $filter = POE::Filter::HTTPD->new; my $req = HTTP::Request->new('ELEPHANT', '/'); my $data = $filter->get([ $req->as_string ]); - check_error_response($data, RC_BAD_REQUEST, - 'unsupported method: bad request'); + check_fields($$data[0], { + protocol => 'HTTP/0.9', + method => 'ELEPHANT', + uri => '/', + }, 'strange method'); } { # bad request -- 1.1 so length required my $filter = POE::Filter::HTTPD->new;