Skip to content
Newer
Older
100644 378 lines (276 sloc) 9.36 KB
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
1 =head1 NAME
2
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
3 HTTP::Parser - parse HTTP/1.1 request into HTTP::Request/Response object
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
4
5 =head1 SYNOPSIS
6
7 my $parser = HTTP::Parser->new();
8
9 ...
10
11 my $status = $parser->add($text);
12
13 if(0 == $status) {
14 print "request: ".$parser->request()->as_string(); # HTTP::Request
15 } elsif(-2 == $status) {
16 print "need a line of data\n";
17 } elsif(-1 == $status) {
18 print "need more data\n";
19 } else { # $status > 0
20 print "need $status byte(s)\n";
21 }
22
23 =head1 DESCRIPTION
24
25 This is an HTTP request parser. It takes chunks of text as received and
26 returns a 'hint' as to what is required, or returns the HTTP::Request when
27 a complete request has been read. HTTP/1.1 chunking is supported. It dies
28 if it finds an error.
29
30 =cut
31 use 5.006_001;
32 use strict;
33
34 package HTTP::Parser;
35
5ec5ea1 @rjray This was release 0.04 of HTTP::Parser
authored
36 our $VERSION = '0.04';
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
37
38 use HTTP::Request;
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
39 use HTTP::Response;
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
40 use URI;
41
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
42 # token is (RFC 2616, ASCII)
43 my $Token =
44 qr/[\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7a\x7c\x7e]+/;
45
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
46
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
47 =head2 new ( named params... )
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
48
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
49 Create a new HTTP::Parser object. Takes named parameters, e.g.:
50
51 my $parser = HTTP::Parser->new(request => 1);
52
53 =over 4
54
55 =item request
56
57 Allows or denies parsing an HTTP request and returning an C<HTTP::Request>
58 object.
59
60 =item response
61
62 Allows or denies parsing an HTTP response and returning an C<HTTP::Response>
63 object.
64
65 =back
66
67 If you pass neither C<request> nor C<response>, only requests are parsed (for
68 backwards compatibility); if you pass either, the other defaults to false
69 (disallowing both requests and responses is a fatal error).
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
70
71 =cut
72 sub new {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
73 my ($class, %p) = @_;
74 $p{request} = 1 unless exists $p{response} or exists $p{request};
75 die 'must allow request or response to be parsed'
76 unless $p{request} or $p{response};
bb49a75 @rjray Applied patch from RT#34021 (https://rt.cpan.org/Ticket/Display.html?…
authored
77 @p{qw(state data strict_compliance)} = ('blank', '', 1);
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
78 my $self = bless \%p, ref $class || $class;
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
79 return $self;
80 }
81
82
83 =head2 add ( string )
84
85 Parse request. Returns:
86
87 =over 8
88
89 =item 0
90
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
91 if finished (call C<object> to get an HTTP::Request or Response object)
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
92
93 =item -1
94
95 if not finished but not sure how many bytes remain
96
97 =item -2
98
99 if waiting for a line (like 0 with a hint)
100
101 =item count
102
103 if waiting for that many bytes
104
105 =back
106
107 Dies on error.
108
109 This method of parsing makes it easier to parse a request from an event-based
110 system, on the other hand, it's quite alright to pass in the whole request.
111 Ideally, the first chunk passed in is the header (up to the double newline),
112 then whatever byte counts are requested.
113
114 When a request object is returned, the X-HTTP-Version header has the HTTP
115 version, the uri() method will always return a URI object, not a string.
116
117 Note that a nonzero return is just a hint, and any amount of data can be
118 passed in to a subsequent add() call.
119
120 =cut
121 sub add {
122 my ($self,$s) = @_;
123 $s = '' if not defined $s;
124
125 $self->{data} .= $s;
126
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
127 # pre-header blank lines are allowed (RFC 2616 4.1)
128 if($self->{state} eq 'blank') {
129 $self->{data} =~ s/^(\x0d?\x0a)+//;
130 return -2 unless length $self->{data};
131 $self->{state} = 'header'; # done with blank lines; fall through
132 }
133
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
134 # still waiting for the header
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
135 if($self->{state} eq 'header') {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
136 # double line break indicates end of header; parse it
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
137 if($self->{data} =~ /^(.*?)\x0d?\x0a\x0d?\x0a/s) {
138 return $self->_parse_header(length $1);
139 }
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
140 return -2; # still waiting for unknown amount of header lines
141
142 # waiting for main body of request
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
143 } elsif($self->{state} eq 'body') {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
144 return $self->_parse_body();
145
146 # chunked data
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
147 } elsif($self->{state} eq 'chunked') {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
148 return $self->_parse_chunk();
149
150 # trailers
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
151 } elsif($self->{state} eq 'trailer') {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
152 # double line break indicates end of trailer; parse it
153 return $self->_parse_header(length $1,1)
154 if $self->{data} =~ /^(.*?)\x0d?\x0a\x0d?\x0a/s;
155 return -1; # still waiting for unknown amount of trailer data
156 }
157
99b9d85 @rjray This was release 0.02 of HTTP::Parser
authored
158 die "unknown state '$self->{state}'";
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
159 }
160
161
162 =head2 data
163
164 Returns current data not parsed. Mainly useful after a request has been
165 parsed. The data is not removed from the object's buffer, and will be
166 seen before the data next passed to add().
167
168 =cut
169 sub data {
170 shift->{data}
171 }
172
173
174 =head2 extra
175
176 Returns the count of extra bytes (length of data()) after a request.
177
178 =cut
179 sub extra {
180 length shift->{data}
181 }
182
183
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
184 =head2 object
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
185
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
186 Returns the object request. Only useful after the parse has completed.
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
187
188 =cut
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
189 sub object {
190 shift->{obj}
191 }
192
193 # keep this for compatibility with 0.02
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
194 sub request {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
195 shift->{obj}
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
196 }
197
198
199 # _parse_header ( position of double newline in data [, trailer flag] )
200 #
201 # helper for parse that parses an HTTP header
202 # prerequisite: we have data up to a double newline in $self->{data}
203 # if the trailer flag is set, we're parsing trailers
204 #
205 sub _parse_header {
206 my ($self,$eoh,$trailer) = @_;
207 my $header = substr($self->{data},0,$eoh,'');
208 $self->{data} =~ s/^\x0d?\x0a\x0d?\x0a//;
209
210 # parse into lines
211 my @header = split /\x0d?\x0a/,$header;
212 my $request = shift @header unless $trailer;
213
214 # join folded lines
215 my @out;
216 for(@header) {
217 if(s/^[ \t]+//) {
218 die 'LWS on first header line' unless @out;
219 $out[-1] .= $_;
220 } else {
221 push @out, $_;
222 }
223 }
224
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
225 # parse request or response line
226 my $obj;
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
227 unless($trailer) {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
228 my ($major, $minor);
229
230 # is it an HTTP response?
5ec5ea1 @rjray This was release 0.04 of HTTP::Parser
authored
231 if ($request =~ /^HTTP\/(\d+)\.(\d+)/i) {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
232 die 'HTTP responses not allowed' unless $self->{response};
233 ($major,$minor) = ($1,$2);
14e44be @rjray Made a change based on the patch from RT#34019, so that the full HTTP…
authored
234 my (undef, $state, $msg) = split / /, $request, 3;
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
235 $obj = $self->{obj} = HTTP::Response->new($state, $msg);
236
237 # perhaps a request?
238 } else {
239 my ($method,$uri,$http) = split / /,$request;
240 die "'$request' is not the start of a valid HTTP request or response"
241 unless $http and $http =~ /^HTTP\/(\d+)\.(\d+)$/i;
242 ($major,$minor) = ($1,$2);
243 die 'HTTP requests not allowed' unless $self->{request};
244 $obj = $self->{obj} = HTTP::Request->new($method, URI->new($uri));
245 }
246
247 $obj->header(X_HTTP_Version => "$major.$minor"); # pseudo-header
248
249 # we've already seen the initial line and created the object
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
250 } else {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
251 $obj = $self->{obj};
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
252 }
253
254 # import headers
255 my $token = qr/[^][\x00-\x1f\x7f()<>@,;:\\"\/?={} \t]+/;
256 for $header(@header) {
257 die "bad header name in '$header'" unless $header =~ s/^($token):[\t ]*//;
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
258 $obj->push_header($1 => $header);
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
259 }
260
261 # if we're parsing trailers we don't need to look at content
262 return 0 if $trailer;
263
264 # see what sort of content we have, if any
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
265 if(my $length = $obj->header('content_length')) {
5ec5ea1 @rjray This was release 0.04 of HTTP::Parser
authored
266 s/^\s+//, s/\s+$// for $length;
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
267 die "bad content-length '$length'" unless $length =~ /^(\d+)$/;
268 $self->{state} = 'body';
269 return $self->_parse_body();
270 }
271
272 # check for transfer-encoding, and handle chunking
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
273 if(my @te = $obj->header('transfer_encoding')) {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
274 if(grep { lc $_ eq 'chunked' } @te) {
275 $self->{state} = 'chunked';
276 return $self->_parse_chunk();
277 }
278 }
279
bb49a75 @rjray Applied patch from RT#34021 (https://rt.cpan.org/Ticket/Display.html?…
authored
280 # perhaps we do have content, but the server didn't include a length
281 # section 14.13 of the spec says "SHOULD" unless there are reasons not
282 # to, some bad servers still don't include one
283 if(length $self->{data} && !defined $obj->header('content_length')) {
284 # FIXME: possibly die here unless the user specifically asks us to be non-compliant
285 # die "no content-length header but we still have content" if $self->{strict_compliance};
286
287 # FIXME: should we set a state here?
288 $self->{obj}->content($self->{data});
289 return 0;
290 }
291
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
292 # else we have no content so return success
293 return 0;
294 }
295
296
297 # _parse_body
298 #
299 # helper for parse, returns request object with content if done, else
300 # count of bytes remaining
301 #
302 sub _parse_body {
303 my $self = shift;
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
304 my $length = $self->{obj}->header('content_length');
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
305 if(length $self->{data} >= $length) {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
306 $self->{obj}->content(substr($self->{data},0,$length,''));
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
307 return 0;
308 }
309 return $length-length $self->{data};
310 }
311
312
313 # _parse_chunk
314 #
315 # helper for parse, parse chunked transfer-encoded message; returns like parse
316 #
317 sub _parse_chunk {
318 my $self = shift;
319
320 CHUNK:
321
322 # need beginning of chunk with size
323 if(not $self->{chunk}) {
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
324 if($self->{data} =~ s/^([0-9a-fA-F]+)[^\x0d\x0a]*?\x0d?\x0a//) {
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
325
326 # a zero-size chunk marks the end
327 unless($self->{chunk} = hex $1) {
328 $self->{state} = 'trailer';
329
330 # double line break indicates end of trailer; parse it
331 $self->{data} = "\x0d\x0a".$self->{data}; # count previous line break
332 return $self->_parse_header(length $1,1)
333 if $self->{data} =~ /^(.*?)\x0d?\x0a\x0d?\x0a/s;
334 return -1; # still waiting for unknown amount of trailer data
335 }
336
337 } else {
338 die "expected chunked enoding, got '".substr($self->{data},0,40)."...'"
339 if $self->{data} =~ /\x0d?\x0a/;
340 return -2; # waiting for a line with chunk information
341 }
342 }
343
344 # do we have a current chunk size?
345 if($self->{chunk}) {
346
347 # do we have enough data to fill it, plus a CR LF?
348 if(length $self->{data} > $self->{chunk} and
349 substr($self->{data},$self->{chunk},2) =~ /^(\x0d?\x0a)/) {
350 my $crlf = $1;
5ec5ea1 @rjray This was release 0.04 of HTTP::Parser
authored
351 $self->{obj}->add_content(substr($self->{data},0,$self->{chunk}));
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
352 substr($self->{data},0,length $crlf) = '';
353
5ec5ea1 @rjray This was release 0.04 of HTTP::Parser
authored
354 # remove data from the buffer that we've already parsed
355 $self->{data} = substr($self->{data},delete $self->{chunk});
356
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
357 # got chunks?
358 goto CHUNK;
359 }
360
361 return $self->{chunk}-length($self->{data})+2; # extra CR LF
362 }
363 }
364
365
366 =head1 AUTHOR
367
368 David Robins E<lt>dbrobins@davidrobins.netE<gt>
369
370 =head1 SEE ALSO
371
e47e562 @rjray This was release 0.03 of HTTP::Parser
authored
372 L<HTTP::Request>, L<HTTP::Response>.
9afbf64 @rjray This was release 0.01 of HTTP::Parser
authored
373
374 =cut
375
376
377 1;
Something went wrong with that request. Please try again.