Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

new version

Trashing the old Ragel parser (which was based on Mongrel) because it's
proving difficult to get the control I need in end-of-message cases.
Replacing this with a hand written parser using a couple tricks borrowed
from NGINX. The new parser will be much more work to write, but should prove
faster and allow for better hacking.
  • Loading branch information...
commit 433202d825fa34de5e42f810bb984ece05a36d20 1 parent 6bfd5bf
@ry ry authored
View
1  .gitignore
@@ -1,4 +1,3 @@
tags
*.o
test
-http_parser.c
View
11 Makefile
@@ -1,5 +1,5 @@
-#OPT=-O0 -g -Wall -Wextra -Werror
-OPT=-O2
+OPT=-O0 -g -Wall -Wextra -Werror
+#OPT=-O2
test: http_parser.o test.c
gcc $(OPT) http_parser.o test.c -o $@
@@ -7,10 +7,7 @@ test: http_parser.o test.c
http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT) -c http_parser.c
-http_parser.c: http_parser.rl Makefile
- ragel -s -G2 http_parser.rl -o $@
-
-tags: http_parser.rl http_parser.h test.c
+tags: http_parser.c http_parser.h test.c
ctags $^
clean:
@@ -18,7 +15,7 @@ clean:
package: http_parser.c
@rm -rf /tmp/http_parser && mkdir /tmp/http_parser && \
- cp LICENSE README.md Makefile http_parser.c http_parser.rl \
+ cp LICENSE README.md Makefile http_parser.c \
http_parser.h test.c /tmp/http_parser && \
cd /tmp && \
tar -cf http_parser.tar http_parser/
View
891 http_parser.c
@@ -0,0 +1,891 @@
+#include <http_parser.h>
+#include <stdint.h>
+#include <assert.h>
+
+#ifndef NULL
+# define NULL ((void*)0)
+#endif
+
+#define MAX_FIELD_SIZE (80*1024)
+
+#define MARK(FOR) \
+do { \
+ parser->FOR##_mark = p; \
+ parser->FOR##_size = 0; \
+} while (0)
+
+#define CALLBACK(FOR) \
+do { \
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
+} while (0)
+
+
+#if 0
+do { \
+ if (parser->FOR##_mark) { \
+ parser->FOR##_size += p - parser->FOR##_mark; \
+ if (parser->FOR##_size > MAX_FIELD_SIZE) { \
+ return ERROR; \
+ } \
+ if (parser->on_##FOR) { \
+ if (0 != parser->on_##FOR(parser, \
+ parser->FOR##_mark, \
+ p - parser->FOR##_mark)) \
+ { \
+ return ERROR; \
+ } \
+ } \
+ } \
+} while(0)
+#endif
+
+static inline int uri_callback (http_parser *parser, const char *p)
+{
+ assert(parser->uri_mark);
+ const char *mark = parser->uri_mark;
+ parser->uri_size += p - mark;
+ if (parser->uri_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_uri == NULL) return 0;
+ return parser->on_uri(parser, mark, p - mark);
+}
+
+static inline int path_callback (http_parser *parser, const char *p)
+{
+ assert(parser->path_mark);
+ const char *mark = parser->path_mark;
+ parser->path_size += p - mark;
+ if (parser->path_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_path == NULL) return 0;
+ return parser->on_path(parser, mark, p - mark);
+}
+
+static inline int query_string_callback (http_parser *parser, const char *p)
+{
+ assert(parser->query_string_mark);
+ const char *mark = parser->query_string_mark;
+ parser->query_string_size += p - mark;
+ if (parser->query_string_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_query_string == NULL) return 0;
+ return parser->on_query_string(parser, mark, p - mark);
+}
+
+static inline int fragment_callback (http_parser *parser, const char *p)
+{
+ assert(parser->fragment_mark);
+ const char *mark = parser->fragment_mark;
+ parser->fragment_size += p - mark;
+ if (parser->fragment_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_fragment == NULL) return 0;
+ return parser->on_fragment(parser, mark, p - mark);
+}
+
+static inline int header_field_callback (http_parser *parser, const char *p)
+{
+ assert(parser->header_field_mark);
+ const char *mark = parser->header_field_mark;
+ parser->header_field_size += p - mark;
+ if (parser->header_field_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_header_field == NULL) return 0;
+ return parser->on_header_field(parser, mark, p - mark);
+}
+
+static inline int header_value_callback (http_parser *parser, const char *p)
+{
+ assert(parser->header_value_mark);
+ const char *mark = parser->header_value_mark;
+ parser->header_value_size += p - mark;
+ if (parser->header_value_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_header_value == NULL) return 0;
+ return parser->on_header_value(parser, mark, p - mark);
+}
+
+#define CONNECTION "connection"
+#define CONTENT_LENGTH "content-length"
+#define TRANSFER_ENCODING "transfer-encoding"
+
+
+static const unsigned char lowcase[] =
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+
+static const uint32_t usual[] = {
+ 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x7fff37d6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+};
+
+enum state
+ { s_start = 0
+
+ , s_method_G
+ , s_method_GE
+
+ , s_method_P
+ , s_method_PU
+ , s_method_PO
+ , s_method_POS
+
+ , s_method_H
+ , s_method_HE
+ , s_method_HEA
+
+ , s_method_D
+ , s_method_DE
+ , s_method_DEL
+ , s_method_DELE
+ , s_method_DELET
+
+ , s_spaces_before_uri
+
+ , s_schema
+ , s_schema_slash
+ , s_schema_slash_slash
+ , s_host
+ , s_port
+
+ , s_path
+ , s_query_string
+ , s_fragment
+
+ , s_http_start
+ , s_http_H
+ , s_http_HT
+ , s_http_HTT
+ , s_http_HTTP
+
+ , s_first_major_digit
+ , s_major_digit
+ , s_first_minor_digit
+ , s_minor_digit
+
+ , s_req_line_almost_done
+
+ , s_header_field_start
+ , s_header_field
+ , s_header_value_start
+ , s_header_value
+
+ , s_header_almost_done
+
+ , s_headers_almost_done
+ , s_headers_done
+ };
+
+enum header_states
+ { h_general = 0
+ , h_C
+ , h_CO
+ , h_CON
+ , h_matching_connection
+ , h_matching_content_length
+ , h_matching_transfer_encoding
+ , h_connection
+ , h_content_length
+ , h_transfer_encoding
+ , h_encoding_C
+ , h_connection_K
+ , h_connection_C
+ };
+
+#define ERROR (p - data);
+#define CR '\r'
+#define LF '\n'
+#define LOWER(c) (unsigned char)(c | 0x20)
+
+size_t http_parser_execute (http_parser *parser, const char *data, size_t len)
+{
+ char c, ch;
+ const char *p, *pe;
+
+ enum state state = parser->state;
+ enum header_states header_state = parser->header_state;
+ size_t header_index = parser->header_index;
+
+ if (parser->header_field_mark) parser->header_field_mark = data;
+ if (parser->header_value_mark) parser->header_value_mark = data;
+ if (parser->fragment_mark) parser->fragment_mark = data;
+ if (parser->query_string_mark) parser->query_string_mark = data;
+ if (parser->path_mark) parser->path_mark = data;
+ if (parser->uri_mark) parser->uri_mark = data;
+
+ for (p=data, pe=data+len; p != pe; p++) {
+ ch = *p;
+ switch (state) {
+ case s_start:
+ {
+ switch (ch) {
+ /* GET */
+ case 'G':
+ state = s_method_G;
+ break;
+
+ /* POST, PUT */
+ case 'P':
+ state = s_method_P;
+ break;
+
+ /* HEAD */
+ case 'H':
+ state = s_method_H;
+ break;
+
+ /* DELETE */
+ case 'D':
+ state = s_method_D;
+ break;
+
+ case CR:
+ case LF:
+ break;
+
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ /* GET */
+
+ case s_method_G:
+ if (ch != 'E') return ERROR;
+ state = s_method_GE;
+ break;
+
+ case s_method_GE:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_GET;
+ state = s_spaces_before_uri;
+ break;
+
+ /* HEAD */
+
+ case s_method_H:
+ if (ch != 'E') return ERROR;
+ state = s_method_HE;
+ break;
+
+ case s_method_HE:
+ if (ch != 'A') return ERROR;
+ state = s_method_HEA;
+ break;
+
+ case s_method_HEA:
+ if (ch != 'D') return ERROR;
+ parser->method = HTTP_HEAD;
+ state = s_spaces_before_uri;
+ break;
+
+ /* POST, PUT */
+
+ case s_method_P:
+ switch (ch) {
+ case 'O':
+ state = s_method_PO;
+ break;
+
+ case 'U':
+ state = s_method_PU;
+ break;
+
+ default:
+ return ERROR;
+ }
+ break;
+
+ /* PUT */
+
+ case s_method_PU:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_PUT;
+ state = s_spaces_before_uri;
+ break;
+
+ /* POST */
+
+ case s_method_PO:
+ if (ch != 'S') return ERROR;
+ state = s_method_POS;
+ break;
+
+ case s_method_POS:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_POST;
+ state = s_spaces_before_uri;
+ break;
+
+ /* DELETE */
+
+ case s_method_D:
+ if (ch != 'E') return ERROR;
+ state = s_method_DE;
+ break;
+
+ case s_method_DE:
+ if (ch != 'L') return ERROR;
+ state = s_method_DEL;
+ break;
+
+ case s_method_DEL:
+ if (ch != 'E') return ERROR;
+ state = s_method_DELE;
+ break;
+
+ case s_method_DELE:
+ if (ch != 'T') return ERROR;
+ state = s_method_DELET;
+ break;
+
+ case s_method_DELET:
+ if (ch != 'E') return ERROR;
+ parser->method = HTTP_DELETE;
+ state = s_spaces_before_uri;
+ break;
+
+
+ case s_spaces_before_uri:
+ {
+ if (ch == ' ') break;
+
+ if (ch == '/') {
+ MARK(uri);
+ MARK(path);
+ state = s_path;
+ break;
+ }
+
+ c = LOWER(ch);
+
+ if (c >= 'a' && c <= 'z') {
+ MARK(uri);
+ state = s_schema;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_schema:
+ {
+ c = LOWER(ch);
+
+ if (c >= 'a' && c <= 'z') break;
+
+ if (ch == ':') {
+ state = s_schema_slash;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_schema_slash:
+ if (ch != '/') return ERROR;
+ state = s_schema_slash_slash;
+ break;
+
+ case s_schema_slash_slash:
+ if (ch != '/') return ERROR;
+ state = s_host;
+ break;
+
+ case s_host:
+ {
+ c = LOWER(ch);
+ if (c >= 'a' && c <= 'z') break;
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
+ switch (ch) {
+ case ':':
+ state = s_port;
+ break;
+ case '/':
+ MARK(path);
+ state = s_path;
+ break;
+ case ' ':
+ /* The request line looks like:
+ * "GET http://foo.bar.com HTTP/1.1"
+ * That is, there is no path.
+ */
+ CALLBACK(uri);
+ state = s_http_start;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_port:
+ {
+ if (ch >= '0' && ch <= '9') break;
+ switch (ch) {
+ case '/':
+ MARK(path);
+ state = s_path;
+ break;
+ case ' ':
+ /* The request line looks like:
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
+ * That is, there is no path.
+ */
+ CALLBACK(uri);
+ state = s_http_start;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_path:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(path);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(path);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(path);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '?':
+ CALLBACK(path);
+ MARK(query_string);
+ state = s_query_string;
+ break;
+ case '#':
+ CALLBACK(path);
+ MARK(fragment);
+ state = s_fragment;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_query_string:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '#':
+ CALLBACK(query_string);
+ MARK(fragment);
+ state = s_fragment;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_fragment:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '?':
+ case '#':
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_http_start:
+ switch (ch) {
+ case 'H':
+ state = s_http_H;
+ break;
+ case ' ':
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+
+ case s_http_H:
+ if (ch != 'T') return ERROR;
+ state = s_http_HT;
+ break;
+
+ case s_http_HT:
+ if (ch != 'T') return ERROR;
+ state = s_http_HTT;
+ break;
+
+ case s_http_HTT:
+ if (ch != 'P') return ERROR;
+ state = s_http_HTTP;
+ break;
+
+ case s_http_HTTP:
+ if (ch != '/') return ERROR;
+ state = s_first_major_digit;
+ break;
+
+ /* first digit of major HTTP version */
+ case s_first_major_digit:
+ if (ch < '1' || ch > '9') return ERROR;
+ parser->http_major = ch - '0';
+ state = s_major_digit;
+ break;
+
+ /* major HTTP version or dot */
+ case s_major_digit:
+ {
+ if (ch == '.') {
+ state = s_first_minor_digit;
+ break;
+ }
+
+ if (ch < '0' || ch > '9') return ERROR;
+
+ parser->http_major *= 10;
+ parser->http_major += ch - '0';
+
+ if (parser->http_major > 999) return ERROR;
+ break;
+ }
+
+ /* first digit of minor HTTP version */
+ case s_first_minor_digit:
+ if (ch < '0' || ch > '9') return ERROR;
+ parser->http_minor = ch - '0';
+ state = s_minor_digit;
+ break;
+
+ /* minor HTTP version or end of request line */
+ case s_minor_digit:
+ {
+ if (ch == CR) {
+ state = s_req_line_almost_done;
+ break;
+ }
+
+ if (ch == LF) {
+ state = s_header_field_start;
+ break;
+ }
+
+ /* XXX allow spaces after digit? */
+
+ if (ch < '0' || ch > '9') return ERROR;
+
+ parser->http_minor *= 10;
+ parser->http_minor += ch - '0';
+
+ if (parser->http_minor > 999) return ERROR;
+ break;
+ }
+
+ /* end of request line */
+ case s_req_line_almost_done:
+ {
+ if (ch != LF) return ERROR;
+ state = s_header_field_start;
+ break;
+ }
+
+ case s_header_field_start:
+ {
+ if (ch == CR) {
+ state = s_headers_almost_done;
+ break;
+ }
+
+ if (ch == LF) {
+ state = s_headers_done;
+ break;
+ }
+
+ c = LOWER(ch);
+
+ if (c < 'a' || 'z' < c) return ERROR;
+
+ MARK(header_field);
+
+ header_index = 0;
+ state = s_header_field;
+
+ switch (c) {
+ case 'c':
+ header_state = h_C;
+ break;
+
+ case 't':
+ header_state = h_matching_transfer_encoding;
+ break;
+
+ default:
+ header_state = h_general;
+ break;
+ }
+ break;
+ }
+
+ case s_header_field:
+ {
+ header_index++;
+
+ c = lowcase[(int)ch];
+
+ if (c) {
+ switch (header_state) {
+ case h_general:
+ break;
+
+ case h_C:
+ header_state = (c == 'o' ? h_CO : h_general);
+ break;
+
+ case h_CO:
+ header_state = (c == 'n' ? h_CON : h_general);
+ break;
+
+ case h_CON:
+ switch (c) {
+ case 'n':
+ header_state = h_matching_connection;
+ break;
+ case 't':
+ header_state = h_matching_content_length;
+ break;
+ default:
+ header_state = h_general;
+ break;
+ }
+ break;
+
+ /* connection */
+
+ case h_matching_connection:
+ if (header_index > sizeof(CONNECTION)-1
+ || c != CONNECTION[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(CONNECTION)-1) {
+ header_state = h_connection;
+ }
+ break;
+
+ /* content-length */
+
+ case h_matching_content_length:
+ if (header_index > sizeof(CONTENT_LENGTH)-1
+ || c != CONTENT_LENGTH[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(CONTENT_LENGTH)-1) {
+ header_state = h_content_length;
+ }
+ break;
+
+ /* transfer-encoding */
+
+ case h_matching_transfer_encoding:
+ if (header_index > sizeof(TRANSFER_ENCODING)-1
+ || c != TRANSFER_ENCODING[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(TRANSFER_ENCODING)-1) {
+ header_state = h_transfer_encoding;
+ }
+ break;
+
+ default:
+ assert(0 && "Unknown header_state");
+ break;
+ }
+ }
+
+ if (ch == ':') {
+ CALLBACK(header_field);
+ state = s_header_value_start;
+ break;
+ }
+
+ if (ch == CR) {
+ state = s_header_almost_done;
+ CALLBACK(header_field);
+ break;
+ }
+
+ if (ch == LF) {
+ CALLBACK(header_field);
+ state = s_header_field_start;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_header_value_start:
+ {
+ if (ch == ' ') break;
+
+ MARK(header_value);
+
+ if (ch == CR) {
+ header_state = h_general;
+ state = s_header_almost_done;
+ }
+
+ if (ch == LF) {
+ header_state = h_general;
+ state = s_headers_done;
+ }
+
+ c = lowcase[(int)ch];
+
+ if (!c) return ERROR;
+
+ switch (header_state) {
+ case h_transfer_encoding:
+ /* looking for 'Transfer-Encoding: chunked' */
+ if ('c' == c) {
+ header_state = h_encoding_C;
+ } else {
+ header_state = h_general;
+ }
+ break;
+
+ case h_content_length:
+ if (ch < '0' || ch > '9') return ERROR;
+ parser->content_length = ch - '0';
+ break;
+
+ case h_connection:
+ /* looking for 'Connection: keep-alive' */
+ if (c == 'k') {
+ header_state = h_connection_K;
+ /* looking for 'Connection: close' */
+ } else if (c == 'c') {
+ header_state = h_connection_C;
+ } else {
+ header_state = h_general;
+ }
+ break;
+
+ default:
+ state = s_header_value;
+ header_state = h_general;
+ break;
+ }
+ break;
+ }
+
+ case s_header_value:
+ {
+ break;
+ }
+
+ case s_header_almost_done:
+ if (ch != LF) return ERROR;
+ state = s_header_field_start;
+ break;
+
+ default:
+ assert(0 && "unhandled state");
+ return ERROR;
+ }
+ }
+
+ CALLBACK(header_field);
+ CALLBACK(header_value);
+ CALLBACK(fragment);
+ CALLBACK(query_string);
+ CALLBACK(path);
+ CALLBACK(uri);
+
+ parser->state = state;
+ parser->header_state = header_state;
+ parser->header_index = header_index;
+
+ return len;
+}
+
+void
+http_parser_init (http_parser *parser, enum http_parser_type type)
+{
+ if (type == HTTP_REQUEST) {
+ parser->state = s_start;
+ } else {
+ assert(0 && "responses not supported yet");
+ }
+
+ parser->on_message_begin = NULL;
+ parser->on_path = NULL;
+ parser->on_query_string = NULL;
+ parser->on_uri = NULL;
+ parser->on_fragment = NULL;
+ parser->on_header_field = NULL;
+ parser->on_header_value = NULL;
+ parser->on_headers_complete = NULL;
+ parser->on_body = NULL;
+ parser->on_message_complete = NULL;
+}
+
View
93 http_parser.h
@@ -1,63 +1,3 @@
-/*
-Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
-<zedshaw at zedshaw dot com> and contributors.
-
-This source file is based on Mongrel's parser. Changes by Ryan Dahl
-<ry@tinyclouds.org> in 2008 and 2009.
-
-You can redistribute it and/or modify it under either the terms of the GPL2
-or the conditions below:
-
-1. You may make and give away verbatim copies of the source form of the
- software without restriction, provided that you duplicate all of the
- original copyright notices and associated disclaimers.
-
-2. You may modify your copy of the software in any way, provided that
- you do at least ONE of the following:
-
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or an
- equivalent medium, or by allowing the author to include your
- modifications in the software.
-
- b) use the modified software only within your corporation or
- organization.
-
- c) rename any non-standard executables so the names do not conflict with
- standard executables, which must also be provided.
-
- d) make other distribution arrangements with the author.
-
-3. You may distribute the software in object code or executable
- form, provided that you do at least ONE of the following:
-
- a) distribute the executables and library files of the software,
- together with instructions (in the manual page or equivalent) on where
- to get the original distribution.
-
- b) accompany the distribution with the machine-readable source of the
- software.
-
- c) give non-standard executables non-standard names, with
- instructions on where to get the original software distribution.
-
- d) make other distribution arrangements with the author.
-
-4. You may modify and include the part of the software into any other
- software (possibly commercial). But some files in the distribution
- are not written by the author, so that they are not under this terms.
-
-5. The scripts and library files supplied as input to or produced as
- output from the software do not automatically fall under the
- copyright of the software, but belong to whomever generated them,
- and may be sold commercially, and may be aggregated with this
- software.
-
-6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- PURPOSE.
-*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
@@ -83,34 +23,20 @@ typedef int (*http_cb) (http_parser*);
/* Request Methods */
enum http_method
- { HTTP_COPY = 0x0001
- , HTTP_DELETE = 0x0002
+ { HTTP_DELETE = 0x0002
, HTTP_GET = 0x0004
, HTTP_HEAD = 0x0008
- , HTTP_LOCK = 0x0010
- , HTTP_MKCOL = 0x0020
- , HTTP_MOVE = 0x0040
- , HTTP_OPTIONS = 0x0080
, HTTP_POST = 0x0100
- , HTTP_PROPFIND = 0x0200
- , HTTP_PROPPATCH = 0x0400
, HTTP_PUT = 0x0800
- , HTTP_TRACE = 0x1000
- , HTTP_UNLOCK = 0x2000
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
-enum http_version
- { HTTP_VERSION_OTHER = 0x00
- , HTTP_VERSION_11 = 0x01
- , HTTP_VERSION_10 = 0x02
- , HTTP_VERSION_09 = 0x04
- };
-
struct http_parser {
/** PRIVATE **/
- int cs;
+ int state;
+ int header_state;
+ size_t header_index;
enum http_parser_type type;
size_t chunk_size;
@@ -134,7 +60,10 @@ struct http_parser {
/** READ-ONLY **/
unsigned short status_code; /* responses only */
enum http_method method; /* requests only */
- enum http_version version;
+
+ int http_major;
+ int http_minor;
+
short keep_alive;
ssize_t content_length;
@@ -163,14 +92,16 @@ struct http_parser {
*/
void http_parser_init (http_parser *parser, enum http_parser_type);
-void http_parser_execute (http_parser *parser, const char *data, size_t len);
+size_t http_parser_execute (http_parser *parser, const char *data, size_t len);
+/*
int http_parser_has_error (http_parser *parser);
+*/
static inline int
http_parser_should_keep_alive (http_parser *parser)
{
- if (parser->keep_alive == -1) return (parser->version == HTTP_VERSION_11);
+ if (parser->keep_alive == -1) return (parser->http_major == 1 && parser->http_minor == 1);
return parser->keep_alive;
}
View
536 http_parser.rl
@@ -1,536 +0,0 @@
-/*
-Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
-<zedshaw at zedshaw dot com> and contributors.
-
-This source file is based on Mongrel's parser. Changes by Ryan Dahl
-<ry@tinyclouds.org> in 2008 and 2009.
-
-You can redistribute it and/or modify it under either the terms of the GPL2
-or the conditions below:
-
-1. You may make and give away verbatim copies of the source form of the
- software without restriction, provided that you duplicate all of the
- original copyright notices and associated disclaimers.
-
-2. You may modify your copy of the software in any way, provided that
- you do at least ONE of the following:
-
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or an
- equivalent medium, or by allowing the author to include your
- modifications in the software.
-
- b) use the modified software only within your corporation or
- organization.
-
- c) rename any non-standard executables so the names do not conflict with
- standard executables, which must also be provided.
-
- d) make other distribution arrangements with the author.
-
-3. You may distribute the software in object code or executable
- form, provided that you do at least ONE of the following:
-
- a) distribute the executables and library files of the software,
- together with instructions (in the manual page or equivalent) on where
- to get the original distribution.
-
- b) accompany the distribution with the machine-readable source of the
- software.
-
- c) give non-standard executables non-standard names, with
- instructions on where to get the original software distribution.
-
- d) make other distribution arrangements with the author.
-
-4. You may modify and include the part of the software into any other
- software (possibly commercial). But some files in the distribution
- are not written by the author, so that they are not under this terms.
-
-5. The scripts and library files supplied as input to or produced as
- output from the software do not automatically fall under the
- copyright of the software, but belong to whomever generated them,
- and may be sold commercially, and may be aggregated with this
- software.
-
-6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- PURPOSE.
-*/
-#include "http_parser.h"
-#include <limits.h>
-#include <assert.h>
-
-/* parser->flags */
-#define EATING 0x01
-#define ERROR 0x02
-#define CHUNKED 0x04
-#define EAT_FOREVER 0x10
-
-static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- };
-
-#undef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-
-#undef NULL
-#define NULL ((void*)(0))
-
-#define MAX_FIELD_SIZE (80*1024)
-
-#define REMAINING (unsigned long)(pe - p)
-#define CALLBACK(FOR) \
-do { \
- if (parser->FOR##_mark) { \
- parser->FOR##_size += p - parser->FOR##_mark; \
- if (parser->FOR##_size > MAX_FIELD_SIZE) { \
- parser->flags |= ERROR; \
- return; \
- } \
- if (parser->on_##FOR) { \
- callback_return_value = parser->on_##FOR(parser, \
- parser->FOR##_mark, \
- p - parser->FOR##_mark); \
- } \
- if (callback_return_value != 0) { \
- parser->flags |= ERROR; \
- return; \
- } \
- } \
-} while(0)
-
-#define RESET_PARSER(parser) \
- parser->chunk_size = 0; \
- parser->flags = 0; \
- parser->header_field_mark = NULL; \
- parser->header_value_mark = NULL; \
- parser->query_string_mark = NULL; \
- parser->path_mark = NULL; \
- parser->uri_mark = NULL; \
- parser->fragment_mark = NULL; \
- parser->status_code = 0; \
- parser->method = 0; \
- parser->version = HTTP_VERSION_OTHER; \
- parser->keep_alive = -1; \
- parser->content_length = -1; \
- parser->body_read = 0
-
-#define END_REQUEST \
-do { \
- if (parser->on_message_complete) { \
- callback_return_value = \
- parser->on_message_complete(parser); \
- } \
- RESET_PARSER(parser); \
-} while (0)
-
-#define SKIP_BODY(nskip) \
-do { \
- tmp = (nskip); \
- if (parser->on_body && tmp > 0) { \
- callback_return_value = parser->on_body(parser, p, tmp); \
- } \
- if (callback_return_value == 0) { \
- p += tmp; \
- parser->body_read += tmp; \
- parser->chunk_size -= tmp; \
- if (0 == parser->chunk_size) { \
- parser->flags &= ~EATING; \
- if (!(parser->flags & CHUNKED)) { \
- END_REQUEST; \
- } \
- } else { \
- parser->flags |= EATING; \
- } \
- } \
-} while (0)
-
-%%{
- machine http_parser;
-
- action mark_header_field {
- parser->header_field_mark = p;
- parser->header_field_size = 0;
- }
-
- action mark_header_value {
- parser->header_value_mark = p;
- parser->header_value_size = 0;
- }
-
- action mark_fragment {
- parser->fragment_mark = p;
- parser->fragment_size = 0;
- }
-
- action mark_query_string {
- parser->query_string_mark = p;
- parser->query_string_size = 0;
- }
-
- action mark_request_path {
- parser->path_mark = p;
- parser->path_size = 0;
- }
-
- action mark_request_uri {
- parser->uri_mark = p;
- parser->uri_size = 0;
- }
-
- action header_field {
- CALLBACK(header_field);
- parser->header_field_mark = NULL;
- parser->header_field_size = 0;
- }
-
- action header_value {
- CALLBACK(header_value);
- parser->header_value_mark = NULL;
- parser->header_value_size = 0;
- }
-
- action request_uri {
- CALLBACK(uri);
- parser->uri_mark = NULL;
- parser->uri_size = 0;
- }
-
- action fragment {
- CALLBACK(fragment);
- parser->fragment_mark = NULL;
- parser->fragment_size = 0;
- }
-
- action query_string {
- CALLBACK(query_string);
- parser->query_string_mark = NULL;
- parser->query_string_size = 0;
- }
-
- action request_path {
- CALLBACK(path);
- parser->path_mark = NULL;
- parser->path_size = 0;
- }
-
- action headers_complete {
- if(parser->on_headers_complete) {
- callback_return_value = parser->on_headers_complete(parser);
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
- }
-
- action begin_message {
- if(parser->on_message_begin) {
- callback_return_value = parser->on_message_begin(parser);
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
- }
-
- action content_length {
- if (parser->content_length == -1) parser->content_length = 0;
- if (parser->content_length > INT_MAX) {
- parser->flags |= ERROR;
- return;
- }
- parser->content_length *= 10;
- parser->content_length += *p - '0';
- }
-
- action status_code {
- parser->status_code *= 10;
- parser->status_code += *p - '0';
- }
-
- action use_chunked_encoding { parser->flags |= CHUNKED; }
-
- action set_keep_alive { parser->keep_alive = 1; }
- action set_not_keep_alive { parser->keep_alive = 0; }
-
- action version_11 { parser->version = HTTP_VERSION_11; }
- action version_10 { parser->version = HTTP_VERSION_10; }
- action version_09 { parser->version = HTTP_VERSION_09; }
-
- action add_to_chunk_size {
- parser->chunk_size *= 16;
- parser->chunk_size += unhex[(int)*p];
- }
-
- action skip_chunk_data {
- SKIP_BODY(MIN(parser->chunk_size, REMAINING));
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
-
- fhold;
- if (parser->chunk_size > REMAINING) {
- fbreak;
- } else {
- fgoto chunk_end;
- }
- }
-
- action end_chunked_body {
- END_REQUEST;
- if (parser->type == HTTP_REQUEST) {
- fnext Requests;
- } else {
- fnext Responses;
- }
- }
-
- action body_logic {
- if (parser->flags & CHUNKED) {
- fnext ChunkedBody;
- } else {
- /* this is pretty stupid. i'd prefer to combine this with
- * skip_chunk_data */
- if (parser->content_length < 0) {
- /* If we didn't get a content length; if not keep-alive
- * just read body until EOF */
- if (!http_parser_should_keep_alive(parser)) {
- parser->flags |= EAT_FOREVER;
- parser->chunk_size = REMAINING;
- } else {
- /* Otherwise, if keep-alive, then assume the message
- * has no body. */
- parser->chunk_size = parser->content_length = 0;
- }
- } else {
- parser->chunk_size = parser->content_length;
- }
- p += 1;
-
- SKIP_BODY(MIN(REMAINING, parser->chunk_size));
-
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
-
- fhold;
- if(parser->chunk_size > REMAINING) {
- fbreak;
- }
- }
- }
-
- CRLF = "\r\n";
-
-# character types
- CTL = (cntrl | 127);
- safe = ("$" | "-" | "_" | ".");
- extra = ("!" | "*" | "'" | "(" | ")" | ",");
- reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
- unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
- national = any -- (alpha | digit | reserved | extra | safe | unsafe);
- unreserved = (alpha | digit | safe | extra | national);
- escape = ("%" xdigit xdigit);
- uchar = (unreserved | escape | "\"");
- pchar = (uchar | ":" | "@" | "&" | "=" | "+");
- tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
- | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
-
-# elements
- token = (ascii -- (CTL | tspecials));
- quote = "\"";
-# qdtext = token -- "\"";
-# quoted_pair = "\" ascii;
-# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
-
-# headers
-
- Method = ( "COPY" %{ parser->method = HTTP_COPY; }
- | "DELETE" %{ parser->method = HTTP_DELETE; }
- | "GET" %{ parser->method = HTTP_GET; }
- | "HEAD" %{ parser->method = HTTP_HEAD; }
- | "LOCK" %{ parser->method = HTTP_LOCK; }
- | "MKCOL" %{ parser->method = HTTP_MKCOL; }
- | "MOVE" %{ parser->method = HTTP_MOVE; }
- | "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
- | "POST" %{ parser->method = HTTP_POST; }
- | "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
- | "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
- | "PUT" %{ parser->method = HTTP_PUT; }
- | "TRACE" %{ parser->method = HTTP_TRACE; }
- | "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
- ); # Not allowing extension methods
-
- HTTP_Version = "HTTP/" ( "1.1" %version_11
- | "1.0" %version_10
- | "0.9" %version_09
- | (digit "." digit)
- );
-
- scheme = ( alpha | digit | "+" | "-" | "." )* ;
- absolute_uri = (scheme ":" (uchar | reserved )*);
- path = ( pchar+ ( "/" pchar* )* ) ;
- query = ( uchar | reserved )* >mark_query_string %query_string ;
- param = ( pchar | "/" )* ;
- params = ( param ( ";" param )* ) ;
- rel_path = ( path? (";" params)? ) ;
- absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
- Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
- Fragment = ( uchar | reserved )* >mark_fragment %fragment;
-
- field_name = ( token -- ":" )+;
- Field_Name = field_name >mark_header_field %header_field;
-
- field_value = ((any - " ") any*)?;
- Field_Value = field_value >mark_header_value %header_value;
-
- hsep = ":" " "*;
- header = (field_name hsep field_value) :> CRLF;
- Header = ( ("Content-Length"i hsep digit+ $content_length)
- | ("Connection"i hsep
- ( "Keep-Alive"i %set_keep_alive
- | "close"i %set_not_keep_alive
- )
- )
- | ("Transfer-Encoding"i hsep "chunked"i %use_chunked_encoding)
- | (Field_Name hsep Field_Value)
- ) :> CRLF;
-
- Headers = (Header)* :> CRLF @headers_complete;
-
- Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
-
- StatusCode = (digit digit digit) $status_code;
- ReasonPhrase = ascii* -- ("\r" | "\n");
- StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF;
-
-# chunked message
- trailing_headers = header*;
- #chunk_ext_val = token | quoted_string;
- chunk_ext_val = token*;
- chunk_ext_name = token*;
- chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
- last_chunk = "0"+ ( chunk_extension | " "+) CRLF;
- chunk_size = (xdigit* [1-9a-fA-F] xdigit* ) $add_to_chunk_size;
- chunk_end = CRLF;
- chunk_body = any >skip_chunk_data;
- chunk_begin = chunk_size ( chunk_extension | " "+ ) CRLF;
- chunk = chunk_begin chunk_body chunk_end;
- ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
-
- Request = (Request_Line Headers) >begin_message @body_logic;
- Response = (StatusLine Headers) >begin_message @body_logic;
-
- Requests := Request*;
- Responses := Response*;
-
- main := any >{
- fhold;
- if (parser->type == HTTP_REQUEST) {
- fgoto Requests;
- } else {
- fgoto Responses;
- }
- };
-
-}%%
-
-%% write data;
-
-void
-http_parser_init (http_parser *parser, enum http_parser_type type)
-{
- int cs = 0;
- %% write init;
- parser->cs = cs;
- parser->type = type;
-
- parser->on_message_begin = NULL;
- parser->on_path = NULL;
- parser->on_query_string = NULL;
- parser->on_uri = NULL;
- parser->on_fragment = NULL;
- parser->on_header_field = NULL;
- parser->on_header_value = NULL;
- parser->on_headers_complete = NULL;
- parser->on_body = NULL;
- parser->on_message_complete = NULL;
-
- RESET_PARSER(parser);
-}
-
-/** exec **/
-void
-http_parser_execute (http_parser *parser, const char *buffer, size_t len)
-{
- size_t tmp; // REMOVE ME this is extremely hacky
- int callback_return_value = 0;
- const char *p, *pe, *eof;
- int cs = parser->cs;
-
- p = buffer;
- pe = buffer+len;
- eof = len ? NULL : pe;
-
- if (parser->flags & EAT_FOREVER) {
- if (len == 0) {
- if (parser->on_message_complete) {
- callback_return_value = parser->on_message_complete(parser);
- if (callback_return_value != 0) parser->flags |= ERROR;
- }
- } else {
- if (parser->on_body) {
- callback_return_value = parser->on_body(parser, p, len);
- if (callback_return_value != 0) parser->flags |= ERROR;
- }
- }
- return;
- }
-
- if (0 < parser->chunk_size && (parser->flags & EATING)) {
- /* eat body */
- SKIP_BODY(MIN(len, parser->chunk_size));
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
-
- if (parser->header_field_mark) parser->header_field_mark = buffer;
- if (parser->header_value_mark) parser->header_value_mark = buffer;
- if (parser->fragment_mark) parser->fragment_mark = buffer;
- if (parser->query_string_mark) parser->query_string_mark = buffer;
- if (parser->path_mark) parser->path_mark = buffer;
- if (parser->uri_mark) parser->uri_mark = buffer;
-
- %% write exec;
-
- parser->cs = cs;
-
- CALLBACK(header_field);
- CALLBACK(header_value);
- CALLBACK(fragment);
- CALLBACK(query_string);
- CALLBACK(path);
- CALLBACK(uri);
-
- assert(p <= pe && "buffer overflow after parsing execute");
-}
-
-int
-http_parser_has_error (http_parser *parser)
-{
- if (parser->flags & ERROR) return 1;
- return parser->cs == http_parser_error;
-}
View
109 test.c
@@ -653,10 +653,8 @@ parse_messages (int message_count, const struct message *input_messages[])
parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, length);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
assert(num_messages == message_count);
@@ -665,32 +663,86 @@ parse_messages (int message_count, const struct message *input_messages[])
}
}
+static void
+print_error (const struct message *message, size_t error_location)
+{
+ printf("\n*** parse error on '%s' ***\n\n", message->name);
+
+ int this_line = 0, char_len = 0;
+ size_t i, j, len = strlen(message->raw), error_location_line = 0;
+ for (i = 0; i < len; i++) {
+ if (i == error_location) this_line = 1;
+ switch (message->raw[i]) {
+ case '\r':
+ char_len = 2;
+ printf("\\r");
+ break;
+
+ case '\n':
+ char_len = 2;
+ printf("\\n\n");
+
+ if (this_line) {
+ for (j = 0; j < error_location_line; j++) {
+ putchar(' ');
+ }
+ printf("^\n\nerror location: %d\n", error_location);
+ return;
+ }
+
+ error_location_line = 0;
+ continue;
+
+ default:
+ char_len = 1;
+ putchar(message->raw[i]);
+ break;
+ }
+ if (!this_line) error_location_line += char_len;
+ }
+}
+
void
test_message (const struct message *message)
{
parser_init(message->type);
- http_parser_execute(&parser, message->raw, strlen(message->raw));
- assert(!http_parser_has_error(&parser));
+ size_t read;
- http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
+ read = http_parser_execute(&parser, message->raw, strlen(message->raw));
+ if (read != strlen(message->raw)) {
+ print_error(message, read);
+ exit(1);
+ }
+
+ read = http_parser_execute(&parser, NULL, 0);
+ if (read != 0) {
+ print_error(message, read);
+ exit(1);
+ }
assert(num_messages == 1);
message_eq(0, message);
}
-void
+int
test_error (const char *buf)
{
parser_init(HTTP_REQUEST);
- http_parser_execute(&parser, buf, strlen(buf));
- http_parser_execute(&parser, NULL, 0);
+ size_t parsed;
+
+ parsed = http_parser_execute(&parser, buf, strlen(buf));
+ if (parsed != strlen(buf)) return 1;
+ parsed = http_parser_execute(&parser, NULL, 0);
+ if (parsed != 0) return 1;
+
+ printf("No error found in the following: %s\n", buf);
+ exit(1);
- assert(http_parser_has_error(&parser));
+ return 0;
}
void
@@ -710,10 +762,8 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct
parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, strlen(total));
- assert(!http_parser_has_error(&parser) );
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser) );
assert(num_messages == 3);
message_eq(0, r1);
@@ -773,16 +823,12 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
*/
http_parser_execute(&parser, buf1, buf1_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf2, buf2_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf3, buf3_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
assert(3 == num_messages);
@@ -797,8 +843,6 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int
main (void)
{
- int i, j, k;
-
printf("sizeof(http_parser) = %d\n", sizeof(http_parser));
int request_count;
@@ -808,18 +852,6 @@ main (void)
for (response_count = 0; responses[response_count].name; response_count++);
- //// RESPONSES
-
- for (i = 0; i < response_count; i++) {
- test_message(&responses[i]);
- }
-
-
-
- puts("responses okay");
-
-
-
/// REQUESTS
@@ -871,15 +903,19 @@ main (void)
"HELLO";
test_error(bad_get_no_headers_no_body);
-
/* TODO sending junk and large headers gets rejected */
/* check to make sure our predefined requests are okay */
+ int i;
for (i = 0; requests[i].name; i++) {
test_message(&requests[i]);
}
+#if 0
+ int j, k;
+
+
for (i = 0; i < request_count; i++) {
for (j = 0; j < request_count; j++) {
for (k = 0; k < request_count; k++) {
@@ -910,5 +946,16 @@ main (void)
puts("requests okay");
+ //// RESPONSES
+
+ for (i = 0; i < response_count; i++) {
+ test_message(&responses[i]);
+ }
+
+
+
+ puts("responses okay");
+
+#endif
return 0;
}

6 comments on commit 433202d

@erichocean

Happy to see you going by hand, but the Ragel parser had the advantage of the graphical Dot state machine output. Are you maintaing a similar FSM for this version, and if so, would you please copy the .Dot file into the repository? Thanks!

@ry

no, unfortunately not.

@temoto

And new parser will not support custom methods?

@ry

no

@claudiusaiz

Hello,

I noticed that lines 771-781 in http_parser.c allow an HTTP header to contain only the field name, without the ':'.
For example, we could have a header like this:

"Accept-Encoding\r\n"

But, section 4.2 of HTTP RFC 2616 specifies that an HTTP header is defined like this:

message-header = field-name ":" [ field-value ]

, having the ':' character compulsory.

I am having some problems with this, and I was thinking of commenting out lines 771-781. Does anyone happen to know if this would have negative effects on the rest of the parser's functionality?

Thanks,
Claudiu

@bnoordhuis

I wager you're looking at an old version of http-parser. With the current master (8081938) the logic you describe is around line 1300. But yeah, it will accept headers without values for the sake of interoperability (think: buggy servers.)

Please sign in to comment.
Something went wrong with that request. Please try again.