Skip to content

Commit

Permalink
Mainly rewrite POE::Filter::HTTPD's parser. May support get_one()
Browse files Browse the repository at this point in the history
now, so some tests also had to change.  I hope this will resolve
rt.cpan.org ticket 48802, reported by a very patient zerhash.
  • Loading branch information
rcaputo committed Aug 22, 2009
1 parent 1fe5697 commit 2b44c87
Show file tree
Hide file tree
Showing 2 changed files with 141 additions and 215 deletions.
302 changes: 105 additions & 197 deletions poe/lib/POE/Filter/HTTPD.pm
@@ -1,9 +1,7 @@
# Filter::HTTPD Copyright 1998 Artur Bergman <artur@vogon.se>. # Filter::HTTPD Copyright 1998 Artur Bergman <artur@vogon.se>.

# Thanks go to Gisle Aas for his excellent HTTP::Daemon. Some of the # Thanks go to Gisle Aas for his excellent HTTP::Daemon. Some of the
# get code was copied out if, unfortunately HTTP::Daemon is not easily # get code was copied out if, unfortunately HTTP::Daemon is not easily
# subclassed for POE because of the blocking nature. # subclassed for POE because of the blocking nature.

# 2001-07-27 RCC: This filter will not support the newer get_one() # 2001-07-27 RCC: This filter will not support the newer get_one()
# interface. It gets single things by default, and it does not # interface. It gets single things by default, and it does not
# support filter switching. If someone absolutely needs to switch to # support filter switching. If someone absolutely needs to switch to
Expand All @@ -15,14 +13,19 @@ use strict;
use POE::Filter; use POE::Filter;


use vars qw($VERSION @ISA); use vars qw($VERSION @ISA);
$VERSION = '1.007'; # NOTE - Should be #.### (three decimal places) $VERSION = '1.007';
# NOTE - Should be #.### (three decimal places)
@ISA = qw(POE::Filter); @ISA = qw(POE::Filter);


sub BUFFER () { 0 } sub BUFFER () { 0 } # raw data buffer to build requests
sub TYPE () { 1 } sub STATE () { 1 } # built a full request
sub FINISH () { 2 } sub REQUEST () { 2 } # partial request being built
sub HEADER () { 3 } sub CLIENT_PROTO () { 3 } # client protoco version requested
sub CLIENT_PROTO () { 4 } sub CONTENT_LEN () { 4 } # expected content length
sub CONTENT_ADDED () { 5 } # amount of content added to request

sub ST_HEADERS () { 0x01 } # waiting for complete header block
sub ST_CONTENT () { 0x02 } # waiting for complete body


use Carp qw(croak); use Carp qw(croak);
use HTTP::Status qw( status_message RC_BAD_REQUEST RC_OK RC_LENGTH_REQUIRED ); use HTTP::Status qw( status_message RC_BAD_REQUEST RC_OK RC_LENGTH_REQUIRED );
Expand All @@ -38,156 +41,69 @@ my $HTTP_1_1 = _http_version("HTTP/1.1");


sub new { sub new {
my $type = shift; my $type = shift;
my $self = [ return bless(
'', # BUFFER [
0, # TYPE '', # BUFFER
0, # FINISH ST_HEADERS, # STATE
undef, # HEADER undef, # REQUEST
undef, # CLIENT_PROTO undef, # CLIENT_PROTO
]; 0, # CONTENT_LEN
bless $self, $type; 0, # CONTENT_ADDED
$self; ],
$type
);
} }


#------------------------------------------------------------------------------ #------------------------------------------------------------------------------


sub get_one_start { sub get_one_start {
my ($self, $stream) = @_; my ($self, $stream) = @_;
return if ( $self->[FINISH] ); $self->[BUFFER] .= join( '', @$stream );
$stream = [ $stream ] unless ( ref( $stream ) );
$self->[BUFFER] .= join( '', @$stream );
} }


sub get_one { sub get_one {
my ($self) = @_; my ($self) = @_;
return ( $self->[FINISH] ) ? [] : $self->get( [] );
}

sub get {
my ($self, $stream) = @_;


# Need to check lengths in octets, not characters. # Need to check lengths in octets, not characters.
BEGIN { eval { require bytes } and bytes->import; } BEGIN { eval { require bytes } and bytes->import; }


# Why? # Waiting for a complete suite of headers.
local($_); if ($self->[STATE] & ST_HEADERS) {

# Strip leading whitespace.
# Sanity check. "finish" is set when a request has completely $self->[BUFFER] =~ s/^\s+//;
# arrived. Subsequent get() calls on the same request should not
# happen.
# TODO Maybe this should return [] instead of dying?

if ($self->[FINISH]) {

# This works around a request length vs. actual content length
# error. Looks like some browsers (mozilla!) sometimes add on an
# extra newline?

# return [] unless @$stream and grep /\S/, @$stream;

my @dump;
my $offset = 0;
$stream = $self->[BUFFER].join("", @$stream);
while (length $stream) {
my $line = substr($stream, 0, 16);
substr($stream, 0, 16) = '';

my $hexdump = unpack 'H*', $line;
$hexdump =~ s/(..)/$1 /g;

$line =~ tr[ -~][.]c;
push @dump, sprintf( "%04x %-47.47s - %s\n", $offset, $hexdump, $line );
$offset += 16;
}

return [
$self->_build_error(
RC_BAD_REQUEST,
"Did not want any more data. Got this:" .
"<p><pre>" . join("", @dump) . "</pre></p>"
)
];
}

# Accumulate data in a framing buffer.

$self->[BUFFER] .= join('', @$stream);


# If headers were already received, then the framing buffer is # No blank line yet. Side effect: Raw headers block is extracted
# purely content. Return nothing until content-length bytes are in # from the input buffer.
# the buffer, then return the entire request. return [] unless (
$self->[BUFFER] =~
s/^(\S.*?(?:\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?))//s
);


if ($self->[HEADER]) { # Raw headers block from the input buffer.
my $buf = $self->[BUFFER]; my $rh = $1;
my $r = $self->[HEADER];
my $cl = $r->content_length() || length($buf) || 0;


# Some browsers (like MSIE 5.01) send extra CRLFs after the # Parse the request line.
# content. Shame on them. Now we need a special case to drop if ($rh !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) {
# their extra crap. return [
# $self->_build_error(RC_BAD_REQUEST, "Request line parse failure. ($rh)")
# We use the first $cl octets of the buffer as the request ];
# content. It's then stripped away. Leading whitespace in
# whatever is left is also stripped away. Any nonspace data left
# over will throw an error.
#
# Four-argument substr() would be ideal here, but it's a
# relatively recent development.
#
# PG- CGI.pm only reads Content-Length: bytes from STDIN.
if (length($buf) >= $cl) {
$r->content(substr($buf, 0, $cl));
$self->[BUFFER] = substr($buf, $cl);
$self->[BUFFER] =~ s/^\s+//;

# We are sending this back, so won't need it anymore.
$self->[HEADER] = undef;
$self->[FINISH]++;
return [$r];
} }


#print "$cl wanted, got " . length($buf) . "\n"; # Create an HTTP::Request object from values in the request line.
return []; my ($method, $request_path, $proto) = ($1, $2, ($3 || "HTTP/0.9"));
}


# Headers aren't already received. Short-circuit header parsing: # Fix a double starting slash on the path. It happens.
# don't return anything until we've received a blank line. $request_path =~ s!^//+!/!;


return [] unless( my $r = HTTP::Request->new($method, URI->new($request_path));
$self->[BUFFER] =~ /(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)/s $r->protocol($proto);
); $self->[CLIENT_PROTO] = $proto = _http_version($proto);


# Copy the buffer for header parsing, and remove the header block # Parse headers.
# from the content buffer.


my $buf = $self->[BUFFER]; my ($key, $val);
$self->[BUFFER] =~ s/.*?(\x0D\x0A?\x0D\x0A?|\x0A\x0D?\x0A\x0D?)//s; HEADER: while ($rh =~ s/^([^\012]*)\012//) {

local $_ = $1;
# Parse the request line.
if ($buf !~ s/^(\w+)[ \t]+(\S+)(?:[ \t]+(HTTP\/\d+\.\d+))?[^\012]*\012//) {
return [
$self->_build_error(RC_BAD_REQUEST, "Request line parse failure.")
];
}
my $proto = $3 || "HTTP/0.9";

# Use the request line to create a request object.

my $method = $1;
my $req_path = $2;
$req_path =~ s/^[\/]{2,}/\//; # fix double slash starting path

my $r = HTTP::Request->new($method, URI->new($req_path));
$r->protocol($proto);
$self->[CLIENT_PROTO] = $proto = _http_version($proto);

# Add the raw request's headers to the request object we'll be
# returning.

if ($proto >= $HTTP_1_0) {
my ($key,$val);
HEADER: while ($buf =~ s/^([^\012]*)\012//) {
$_ = $1;
s/\015$//; s/\015$//;
if (/^([\w\-~]+)\s*:\s*(.*)/) { if (/^([\w\-~]+)\s*:\s*(.*)/) {
$r->push_header($key, $val) if $key; $r->push_header($key, $val) if $key;
Expand All @@ -200,75 +116,70 @@ sub get {
last HEADER; last HEADER;
} }
} }
$r->push_header($key,$val) if($key);
}


$self->[HEADER] = $r; $r->push_header($key, $val) if $key;


# If this is a GET or HEAD request, we won't be expecting a message # We got a full set of headers. Fall through to content if we
# body. Finish up. # have a content length.
$method = uc $r->method();
if ($method eq 'GET' or $method eq 'HEAD') {
$self->[FINISH]++;
# We are sending this back, so won't need it anymore.
$self->[HEADER] = undef;
return [$r];
}


# However, if it's any other type of request, check whether the my $cl = $r->content_length() || 0;
# entire content has already been received! If so, add that to the $cl =~ s/\D.*$//;
# request and we're done. Otherwise we'll expect a subsequent get() $cl ||= 0;
# call to finish things up.


#print "post:$buf:\END BUFFER\n"; unless ($cl) {
#print length($buf)."-".$r->content_length()."\n"; if ($self->[CLIENT_PROTO] >= $HTTP_1_1) {
$r = $self->_build_error(RC_LENGTH_REQUIRED, "No content length found.")
}


my $cl = $r->content_length(); $self->[STATE] = ST_HEADERS;
unless(defined $cl) { @$self[REQUEST, CLIENT_PROTO] = (undef, undef);
if($self->[CLIENT_PROTO] == 9) { @$self[CONTENT_LEN, CONTENT_ADDED] = (0, 0);
return [
$self->_build_error( return [ $r ];
RC_BAD_REQUEST,
"POST request detected in an HTTP 0.9 transaction. " .
"POST is not a valid HTTP 0.9 transaction type. " .
"Please verify your HTTP version and transaction content."
)
];
}
elsif ($method eq 'OPTIONS') {
$self->[FINISH]++;
# OPTIONS requests can have an optional content length
# See http://www.faqs.org/rfcs/rfc2616.html, section 9.2
$self->[HEADER] = undef;
return [$r];
}
else {
return [
$self->_build_error(RC_LENGTH_REQUIRED, "No content length found.")
];
} }
}


unless ($cl =~ /^\d+$/) { $self->[REQUEST] = $r;
return [ $self->[CONTENT_LEN] = $cl;
$self->_build_error( $self->[STATE] = ST_CONTENT;
RC_BAD_REQUEST, # Fall through to content.
"Content length contains non-digits."
)
];
} }


if (length($buf) >= $cl) { # Waiting for content.
$r->content(substr($buf, 0, $cl)); if ($self->[STATE] & ST_CONTENT) {
$self->[BUFFER] = substr($buf, $cl); my $r = $self->[REQUEST];
my $cl_needed = $self->[CONTENT_LEN] - $self->[CONTENT_ADDED];
die "already got enough content ($cl_needed needed)" if $cl_needed < 1;

# Not enough content to complete the request. Add it to the
# request content, and return an incomplete status.
if (length($self->[BUFFER]) < $cl_needed) {
$r->add_content($self->[BUFFER]);
$self->[CONTENT_ADDED] += length($self->[BUFFER]);
$self->[BUFFER] = "";
return [];
}

# Enough data. Add it to the request content.
# PG- CGI.pm only reads Content-Length: bytes from STDIN.

# Four-argument substr() would be ideal here, but it's not
# entirely backward compatible.
$r->add_content(substr($self->[BUFFER], 0, $cl_needed));
substr($self->[BUFFER], 0, $cl_needed) = "";

# Some browsers (like MSIE 5.01) send extra CRLFs after the
# content. Shame on them.
$self->[BUFFER] =~ s/^\s+//; $self->[BUFFER] =~ s/^\s+//;
$self->[FINISH]++;
# We are sending this back, so won't need it anymore. # Prepare for the next request, and return this one.
$self->[HEADER] = undef; $self->[STATE] = ST_HEADERS;
return [$r]; @$self[REQUEST, CLIENT_PROTO] = (undef, undef);
@$self[CONTENT_LEN, CONTENT_ADDED] = (0, 0);
return [ $r ];
} }


return []; # What are we waiting for?
die "unknown state $self->[STATE]";
} }


#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
Expand Down Expand Up @@ -303,9 +214,6 @@ sub put {
push @raw, join("\x0D\x0A", @headers, "") . $_->content; push @raw, join("\x0D\x0A", @headers, "") . $_->content;
} }


# Allow next request after we're done sending the response.
$self->[FINISH]--;

\@raw; \@raw;
} }


Expand Down

0 comments on commit 2b44c87

Please sign in to comment.