Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

new version

Trashing the old Ragel parser (which was based on Mongrel) because it's
proving difficult to get the control I need in end-of-message cases.
Replacing this with a hand written parser using a couple tricks borrowed
from NGINX. The new parser will be much more work to write, but should prove
faster and allow for better hacking.
  • Loading branch information...
commit 433202d825fa34de5e42f810bb984ece05a36d20 1 parent 6bfd5bf
ry ry authored
1  .gitignore
View
@@ -1,4 +1,3 @@
tags
*.o
test
-http_parser.c
11 Makefile
View
@@ -1,5 +1,5 @@
-#OPT=-O0 -g -Wall -Wextra -Werror
-OPT=-O2
+OPT=-O0 -g -Wall -Wextra -Werror
+#OPT=-O2
test: http_parser.o test.c
gcc $(OPT) http_parser.o test.c -o $@
@@ -7,10 +7,7 @@ test: http_parser.o test.c
http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT) -c http_parser.c
-http_parser.c: http_parser.rl Makefile
- ragel -s -G2 http_parser.rl -o $@
-
-tags: http_parser.rl http_parser.h test.c
+tags: http_parser.c http_parser.h test.c
ctags $^
clean:
@@ -18,7 +15,7 @@ clean:
package: http_parser.c
@rm -rf /tmp/http_parser && mkdir /tmp/http_parser && \
- cp LICENSE README.md Makefile http_parser.c http_parser.rl \
+ cp LICENSE README.md Makefile http_parser.c \
http_parser.h test.c /tmp/http_parser && \
cd /tmp && \
tar -cf http_parser.tar http_parser/
891 http_parser.c
View
@@ -0,0 +1,891 @@
+#include <http_parser.h>
+#include <stdint.h>
+#include <assert.h>
+
+#ifndef NULL
+# define NULL ((void*)0)
+#endif
+
+#define MAX_FIELD_SIZE (80*1024)
+
+#define MARK(FOR) \
+do { \
+ parser->FOR##_mark = p; \
+ parser->FOR##_size = 0; \
+} while (0)
+
+#define CALLBACK(FOR) \
+do { \
+ if (0 != FOR##_callback(parser, p)) return (p - data); \
+} while (0)
+
+
+#if 0
+do { \
+ if (parser->FOR##_mark) { \
+ parser->FOR##_size += p - parser->FOR##_mark; \
+ if (parser->FOR##_size > MAX_FIELD_SIZE) { \
+ return ERROR; \
+ } \
+ if (parser->on_##FOR) { \
+ if (0 != parser->on_##FOR(parser, \
+ parser->FOR##_mark, \
+ p - parser->FOR##_mark)) \
+ { \
+ return ERROR; \
+ } \
+ } \
+ } \
+} while(0)
+#endif
+
+static inline int uri_callback (http_parser *parser, const char *p)
+{
+ assert(parser->uri_mark);
+ const char *mark = parser->uri_mark;
+ parser->uri_size += p - mark;
+ if (parser->uri_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_uri == NULL) return 0;
+ return parser->on_uri(parser, mark, p - mark);
+}
+
+static inline int path_callback (http_parser *parser, const char *p)
+{
+ assert(parser->path_mark);
+ const char *mark = parser->path_mark;
+ parser->path_size += p - mark;
+ if (parser->path_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_path == NULL) return 0;
+ return parser->on_path(parser, mark, p - mark);
+}
+
+static inline int query_string_callback (http_parser *parser, const char *p)
+{
+ assert(parser->query_string_mark);
+ const char *mark = parser->query_string_mark;
+ parser->query_string_size += p - mark;
+ if (parser->query_string_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_query_string == NULL) return 0;
+ return parser->on_query_string(parser, mark, p - mark);
+}
+
+static inline int fragment_callback (http_parser *parser, const char *p)
+{
+ assert(parser->fragment_mark);
+ const char *mark = parser->fragment_mark;
+ parser->fragment_size += p - mark;
+ if (parser->fragment_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_fragment == NULL) return 0;
+ return parser->on_fragment(parser, mark, p - mark);
+}
+
+static inline int header_field_callback (http_parser *parser, const char *p)
+{
+ assert(parser->header_field_mark);
+ const char *mark = parser->header_field_mark;
+ parser->header_field_size += p - mark;
+ if (parser->header_field_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_header_field == NULL) return 0;
+ return parser->on_header_field(parser, mark, p - mark);
+}
+
+static inline int header_value_callback (http_parser *parser, const char *p)
+{
+ assert(parser->header_value_mark);
+ const char *mark = parser->header_value_mark;
+ parser->header_value_size += p - mark;
+ if (parser->header_value_size > MAX_FIELD_SIZE) return -1;
+ if (parser->on_header_value == NULL) return 0;
+ return parser->on_header_value(parser, mark, p - mark);
+}
+
+#define CONNECTION "connection"
+#define CONTENT_LENGTH "content-length"
+#define TRANSFER_ENCODING "transfer-encoding"
+
+
+static const unsigned char lowcase[] =
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0-\0\0" "0123456789\0\0\0\0\0\0"
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
+ "\0abcdefghijklmnopqrstuvwxyz\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
+ "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
+
+
+static const uint32_t usual[] = {
+ 0xffffdbfe, /* 1111 1111 1111 1111 1101 1011 1111 1110 */
+
+ /* ?>=< ;:98 7654 3210 /.-, +*)( '&%$ #"! */
+ 0x7fff37d6, /* 0111 1111 1111 1111 1111 1111 1111 0110 */
+
+ /* _^]\ [ZYX WVUT SRQP ONML KJIH GFED CBA@ */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ /* ~}| {zyx wvut srqp onml kjih gfed cba` */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff, /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+ 0xffffffff /* 1111 1111 1111 1111 1111 1111 1111 1111 */
+};
+
+enum state
+ { s_start = 0
+
+ , s_method_G
+ , s_method_GE
+
+ , s_method_P
+ , s_method_PU
+ , s_method_PO
+ , s_method_POS
+
+ , s_method_H
+ , s_method_HE
+ , s_method_HEA
+
+ , s_method_D
+ , s_method_DE
+ , s_method_DEL
+ , s_method_DELE
+ , s_method_DELET
+
+ , s_spaces_before_uri
+
+ , s_schema
+ , s_schema_slash
+ , s_schema_slash_slash
+ , s_host
+ , s_port
+
+ , s_path
+ , s_query_string
+ , s_fragment
+
+ , s_http_start
+ , s_http_H
+ , s_http_HT
+ , s_http_HTT
+ , s_http_HTTP
+
+ , s_first_major_digit
+ , s_major_digit
+ , s_first_minor_digit
+ , s_minor_digit
+
+ , s_req_line_almost_done
+
+ , s_header_field_start
+ , s_header_field
+ , s_header_value_start
+ , s_header_value
+
+ , s_header_almost_done
+
+ , s_headers_almost_done
+ , s_headers_done
+ };
+
+enum header_states
+ { h_general = 0
+ , h_C
+ , h_CO
+ , h_CON
+ , h_matching_connection
+ , h_matching_content_length
+ , h_matching_transfer_encoding
+ , h_connection
+ , h_content_length
+ , h_transfer_encoding
+ , h_encoding_C
+ , h_connection_K
+ , h_connection_C
+ };
+
+#define ERROR (p - data);
+#define CR '\r'
+#define LF '\n'
+#define LOWER(c) (unsigned char)(c | 0x20)
+
+size_t http_parser_execute (http_parser *parser, const char *data, size_t len)
+{
+ char c, ch;
+ const char *p, *pe;
+
+ enum state state = parser->state;
+ enum header_states header_state = parser->header_state;
+ size_t header_index = parser->header_index;
+
+ if (parser->header_field_mark) parser->header_field_mark = data;
+ if (parser->header_value_mark) parser->header_value_mark = data;
+ if (parser->fragment_mark) parser->fragment_mark = data;
+ if (parser->query_string_mark) parser->query_string_mark = data;
+ if (parser->path_mark) parser->path_mark = data;
+ if (parser->uri_mark) parser->uri_mark = data;
+
+ for (p=data, pe=data+len; p != pe; p++) {
+ ch = *p;
+ switch (state) {
+ case s_start:
+ {
+ switch (ch) {
+ /* GET */
+ case 'G':
+ state = s_method_G;
+ break;
+
+ /* POST, PUT */
+ case 'P':
+ state = s_method_P;
+ break;
+
+ /* HEAD */
+ case 'H':
+ state = s_method_H;
+ break;
+
+ /* DELETE */
+ case 'D':
+ state = s_method_D;
+ break;
+
+ case CR:
+ case LF:
+ break;
+
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ /* GET */
+
+ case s_method_G:
+ if (ch != 'E') return ERROR;
+ state = s_method_GE;
+ break;
+
+ case s_method_GE:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_GET;
+ state = s_spaces_before_uri;
+ break;
+
+ /* HEAD */
+
+ case s_method_H:
+ if (ch != 'E') return ERROR;
+ state = s_method_HE;
+ break;
+
+ case s_method_HE:
+ if (ch != 'A') return ERROR;
+ state = s_method_HEA;
+ break;
+
+ case s_method_HEA:
+ if (ch != 'D') return ERROR;
+ parser->method = HTTP_HEAD;
+ state = s_spaces_before_uri;
+ break;
+
+ /* POST, PUT */
+
+ case s_method_P:
+ switch (ch) {
+ case 'O':
+ state = s_method_PO;
+ break;
+
+ case 'U':
+ state = s_method_PU;
+ break;
+
+ default:
+ return ERROR;
+ }
+ break;
+
+ /* PUT */
+
+ case s_method_PU:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_PUT;
+ state = s_spaces_before_uri;
+ break;
+
+ /* POST */
+
+ case s_method_PO:
+ if (ch != 'S') return ERROR;
+ state = s_method_POS;
+ break;
+
+ case s_method_POS:
+ if (ch != 'T') return ERROR;
+ parser->method = HTTP_POST;
+ state = s_spaces_before_uri;
+ break;
+
+ /* DELETE */
+
+ case s_method_D:
+ if (ch != 'E') return ERROR;
+ state = s_method_DE;
+ break;
+
+ case s_method_DE:
+ if (ch != 'L') return ERROR;
+ state = s_method_DEL;
+ break;
+
+ case s_method_DEL:
+ if (ch != 'E') return ERROR;
+ state = s_method_DELE;
+ break;
+
+ case s_method_DELE:
+ if (ch != 'T') return ERROR;
+ state = s_method_DELET;
+ break;
+
+ case s_method_DELET:
+ if (ch != 'E') return ERROR;
+ parser->method = HTTP_DELETE;
+ state = s_spaces_before_uri;
+ break;
+
+
+ case s_spaces_before_uri:
+ {
+ if (ch == ' ') break;
+
+ if (ch == '/') {
+ MARK(uri);
+ MARK(path);
+ state = s_path;
+ break;
+ }
+
+ c = LOWER(ch);
+
+ if (c >= 'a' && c <= 'z') {
+ MARK(uri);
+ state = s_schema;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_schema:
+ {
+ c = LOWER(ch);
+
+ if (c >= 'a' && c <= 'z') break;
+
+ if (ch == ':') {
+ state = s_schema_slash;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_schema_slash:
+ if (ch != '/') return ERROR;
+ state = s_schema_slash_slash;
+ break;
+
+ case s_schema_slash_slash:
+ if (ch != '/') return ERROR;
+ state = s_host;
+ break;
+
+ case s_host:
+ {
+ c = LOWER(ch);
+ if (c >= 'a' && c <= 'z') break;
+ if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
+ switch (ch) {
+ case ':':
+ state = s_port;
+ break;
+ case '/':
+ MARK(path);
+ state = s_path;
+ break;
+ case ' ':
+ /* The request line looks like:
+ * "GET http://foo.bar.com HTTP/1.1"
+ * That is, there is no path.
+ */
+ CALLBACK(uri);
+ state = s_http_start;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_port:
+ {
+ if (ch >= '0' && ch <= '9') break;
+ switch (ch) {
+ case '/':
+ MARK(path);
+ state = s_path;
+ break;
+ case ' ':
+ /* The request line looks like:
+ * "GET http://foo.bar.com:1234 HTTP/1.1"
+ * That is, there is no path.
+ */
+ CALLBACK(uri);
+ state = s_http_start;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_path:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(path);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(path);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(path);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '?':
+ CALLBACK(path);
+ MARK(query_string);
+ state = s_query_string;
+ break;
+ case '#':
+ CALLBACK(path);
+ MARK(fragment);
+ state = s_fragment;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_query_string:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(query_string);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '#':
+ CALLBACK(query_string);
+ MARK(fragment);
+ state = s_fragment;
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_fragment:
+ {
+ if (usual[ch >> 5] & (1 << (ch & 0x1f))) break;
+
+ switch (ch) {
+ case ' ':
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ state = s_http_start;
+ break;
+ case CR:
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ parser->http_minor = 9;
+ state = s_req_line_almost_done;
+ break;
+ case LF:
+ CALLBACK(uri);
+ CALLBACK(fragment);
+ parser->http_minor = 9;
+ state = s_header_field_start;
+ break;
+ case '?':
+ case '#':
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+ }
+
+ case s_http_start:
+ switch (ch) {
+ case 'H':
+ state = s_http_H;
+ break;
+ case ' ':
+ break;
+ default:
+ return ERROR;
+ }
+ break;
+
+ case s_http_H:
+ if (ch != 'T') return ERROR;
+ state = s_http_HT;
+ break;
+
+ case s_http_HT:
+ if (ch != 'T') return ERROR;
+ state = s_http_HTT;
+ break;
+
+ case s_http_HTT:
+ if (ch != 'P') return ERROR;
+ state = s_http_HTTP;
+ break;
+
+ case s_http_HTTP:
+ if (ch != '/') return ERROR;
+ state = s_first_major_digit;
+ break;
+
+ /* first digit of major HTTP version */
+ case s_first_major_digit:
+ if (ch < '1' || ch > '9') return ERROR;
+ parser->http_major = ch - '0';
+ state = s_major_digit;
+ break;
+
+ /* major HTTP version or dot */
+ case s_major_digit:
+ {
+ if (ch == '.') {
+ state = s_first_minor_digit;
+ break;
+ }
+
+ if (ch < '0' || ch > '9') return ERROR;
+
+ parser->http_major *= 10;
+ parser->http_major += ch - '0';
+
+ if (parser->http_major > 999) return ERROR;
+ break;
+ }
+
+ /* first digit of minor HTTP version */
+ case s_first_minor_digit:
+ if (ch < '0' || ch > '9') return ERROR;
+ parser->http_minor = ch - '0';
+ state = s_minor_digit;
+ break;
+
+ /* minor HTTP version or end of request line */
+ case s_minor_digit:
+ {
+ if (ch == CR) {
+ state = s_req_line_almost_done;
+ break;
+ }
+
+ if (ch == LF) {
+ state = s_header_field_start;
+ break;
+ }
+
+ /* XXX allow spaces after digit? */
+
+ if (ch < '0' || ch > '9') return ERROR;
+
+ parser->http_minor *= 10;
+ parser->http_minor += ch - '0';
+
+ if (parser->http_minor > 999) return ERROR;
+ break;
+ }
+
+ /* end of request line */
+ case s_req_line_almost_done:
+ {
+ if (ch != LF) return ERROR;
+ state = s_header_field_start;
+ break;
+ }
+
+ case s_header_field_start:
+ {
+ if (ch == CR) {
+ state = s_headers_almost_done;
+ break;
+ }
+
+ if (ch == LF) {
+ state = s_headers_done;
+ break;
+ }
+
+ c = LOWER(ch);
+
+ if (c < 'a' || 'z' < c) return ERROR;
+
+ MARK(header_field);
+
+ header_index = 0;
+ state = s_header_field;
+
+ switch (c) {
+ case 'c':
+ header_state = h_C;
+ break;
+
+ case 't':
+ header_state = h_matching_transfer_encoding;
+ break;
+
+ default:
+ header_state = h_general;
+ break;
+ }
+ break;
+ }
+
+ case s_header_field:
+ {
+ header_index++;
+
+ c = lowcase[(int)ch];
+
+ if (c) {
+ switch (header_state) {
+ case h_general:
+ break;
+
+ case h_C:
+ header_state = (c == 'o' ? h_CO : h_general);
+ break;
+
+ case h_CO:
+ header_state = (c == 'n' ? h_CON : h_general);
+ break;
+
+ case h_CON:
+ switch (c) {
+ case 'n':
+ header_state = h_matching_connection;
+ break;
+ case 't':
+ header_state = h_matching_content_length;
+ break;
+ default:
+ header_state = h_general;
+ break;
+ }
+ break;
+
+ /* connection */
+
+ case h_matching_connection:
+ if (header_index > sizeof(CONNECTION)-1
+ || c != CONNECTION[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(CONNECTION)-1) {
+ header_state = h_connection;
+ }
+ break;
+
+ /* content-length */
+
+ case h_matching_content_length:
+ if (header_index > sizeof(CONTENT_LENGTH)-1
+ || c != CONTENT_LENGTH[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(CONTENT_LENGTH)-1) {
+ header_state = h_content_length;
+ }
+ break;
+
+ /* transfer-encoding */
+
+ case h_matching_transfer_encoding:
+ if (header_index > sizeof(TRANSFER_ENCODING)-1
+ || c != TRANSFER_ENCODING[header_index]) {
+ header_state = h_general;
+ } else if (header_index == sizeof(TRANSFER_ENCODING)-1) {
+ header_state = h_transfer_encoding;
+ }
+ break;
+
+ default:
+ assert(0 && "Unknown header_state");
+ break;
+ }
+ }
+
+ if (ch == ':') {
+ CALLBACK(header_field);
+ state = s_header_value_start;
+ break;
+ }
+
+ if (ch == CR) {
+ state = s_header_almost_done;
+ CALLBACK(header_field);
+ break;
+ }
+
+ if (ch == LF) {
+ CALLBACK(header_field);
+ state = s_header_field_start;
+ break;
+ }
+
+ return ERROR;
+ }
+
+ case s_header_value_start:
+ {
+ if (ch == ' ') break;
+
+ MARK(header_value);
+
+ if (ch == CR) {
+ header_state = h_general;
+ state = s_header_almost_done;
+ }
+
+ if (ch == LF) {
+ header_state = h_general;
+ state = s_headers_done;
+ }
+
+ c = lowcase[(int)ch];
+
+ if (!c) return ERROR;
+
+ switch (header_state) {
+ case h_transfer_encoding:
+ /* looking for 'Transfer-Encoding: chunked' */
+ if ('c' == c) {
+ header_state = h_encoding_C;
+ } else {
+ header_state = h_general;
+ }
+ break;
+
+ case h_content_length:
+ if (ch < '0' || ch > '9') return ERROR;
+ parser->content_length = ch - '0';
+ break;
+
+ case h_connection:
+ /* looking for 'Connection: keep-alive' */
+ if (c == 'k') {
+ header_state = h_connection_K;
+ /* looking for 'Connection: close' */
+ } else if (c == 'c') {
+ header_state = h_connection_C;
+ } else {
+ header_state = h_general;
+ }
+ break;
+
+ default:
+ state = s_header_value;
+ header_state = h_general;
+ break;
+ }
+ break;
+ }
+
+ case s_header_value:
+ {
+ break;
+ }
+
+ case s_header_almost_done:
+ if (ch != LF) return ERROR;
+ state = s_header_field_start;
+ break;
+
+ default:
+ assert(0 && "unhandled state");
+ return ERROR;
+ }
+ }
+
+ CALLBACK(header_field);
+ CALLBACK(header_value);
+ CALLBACK(fragment);
+ CALLBACK(query_string);
+ CALLBACK(path);
+ CALLBACK(uri);
+
+ parser->state = state;
+ parser->header_state = header_state;
+ parser->header_index = header_index;
+
+ return len;
+}
+
+void
+http_parser_init (http_parser *parser, enum http_parser_type type)
+{
+ if (type == HTTP_REQUEST) {
+ parser->state = s_start;
+ } else {
+ assert(0 && "responses not supported yet");
+ }
+
+ parser->on_message_begin = NULL;
+ parser->on_path = NULL;
+ parser->on_query_string = NULL;
+ parser->on_uri = NULL;
+ parser->on_fragment = NULL;
+ parser->on_header_field = NULL;
+ parser->on_header_value = NULL;
+ parser->on_headers_complete = NULL;
+ parser->on_body = NULL;
+ parser->on_message_complete = NULL;
+}
+
93 http_parser.h
View
@@ -1,63 +1,3 @@
-/*
-Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
-<zedshaw at zedshaw dot com> and contributors.
-
-This source file is based on Mongrel's parser. Changes by Ryan Dahl
-<ry@tinyclouds.org> in 2008 and 2009.
-
-You can redistribute it and/or modify it under either the terms of the GPL2
-or the conditions below:
-
-1. You may make and give away verbatim copies of the source form of the
- software without restriction, provided that you duplicate all of the
- original copyright notices and associated disclaimers.
-
-2. You may modify your copy of the software in any way, provided that
- you do at least ONE of the following:
-
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or an
- equivalent medium, or by allowing the author to include your
- modifications in the software.
-
- b) use the modified software only within your corporation or
- organization.
-
- c) rename any non-standard executables so the names do not conflict with
- standard executables, which must also be provided.
-
- d) make other distribution arrangements with the author.
-
-3. You may distribute the software in object code or executable
- form, provided that you do at least ONE of the following:
-
- a) distribute the executables and library files of the software,
- together with instructions (in the manual page or equivalent) on where
- to get the original distribution.
-
- b) accompany the distribution with the machine-readable source of the
- software.
-
- c) give non-standard executables non-standard names, with
- instructions on where to get the original software distribution.
-
- d) make other distribution arrangements with the author.
-
-4. You may modify and include the part of the software into any other
- software (possibly commercial). But some files in the distribution
- are not written by the author, so that they are not under this terms.
-
-5. The scripts and library files supplied as input to or produced as
- output from the software do not automatically fall under the
- copyright of the software, but belong to whomever generated them,
- and may be sold commercially, and may be aggregated with this
- software.
-
-6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- PURPOSE.
-*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
@@ -83,34 +23,20 @@ typedef int (*http_cb) (http_parser*);
/* Request Methods */
enum http_method
- { HTTP_COPY = 0x0001
- , HTTP_DELETE = 0x0002
+ { HTTP_DELETE = 0x0002
, HTTP_GET = 0x0004
, HTTP_HEAD = 0x0008
- , HTTP_LOCK = 0x0010
- , HTTP_MKCOL = 0x0020
- , HTTP_MOVE = 0x0040
- , HTTP_OPTIONS = 0x0080
, HTTP_POST = 0x0100
- , HTTP_PROPFIND = 0x0200
- , HTTP_PROPPATCH = 0x0400
, HTTP_PUT = 0x0800
- , HTTP_TRACE = 0x1000
- , HTTP_UNLOCK = 0x2000
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
-enum http_version
- { HTTP_VERSION_OTHER = 0x00
- , HTTP_VERSION_11 = 0x01
- , HTTP_VERSION_10 = 0x02
- , HTTP_VERSION_09 = 0x04
- };
-
struct http_parser {
/** PRIVATE **/
- int cs;
+ int state;
+ int header_state;
+ size_t header_index;
enum http_parser_type type;
size_t chunk_size;
@@ -134,7 +60,10 @@ struct http_parser {
/** READ-ONLY **/
unsigned short status_code; /* responses only */
enum http_method method; /* requests only */
- enum http_version version;
+
+ int http_major;
+ int http_minor;
+
short keep_alive;
ssize_t content_length;
@@ -163,14 +92,16 @@ struct http_parser {
*/
void http_parser_init (http_parser *parser, enum http_parser_type);
-void http_parser_execute (http_parser *parser, const char *data, size_t len);
+size_t http_parser_execute (http_parser *parser, const char *data, size_t len);
+/*
int http_parser_has_error (http_parser *parser);
+*/
static inline int
http_parser_should_keep_alive (http_parser *parser)
{
- if (parser->keep_alive == -1) return (parser->version == HTTP_VERSION_11);
+ if (parser->keep_alive == -1) return (parser->http_major == 1 && parser->http_minor == 1);
return parser->keep_alive;
}
536 http_parser.rl
View
@@ -1,536 +0,0 @@
-/*
-Mongrel Web Server (Mongrel) is copyrighted free software by Zed A. Shaw
-<zedshaw at zedshaw dot com> and contributors.
-
-This source file is based on Mongrel's parser. Changes by Ryan Dahl
-<ry@tinyclouds.org> in 2008 and 2009.
-
-You can redistribute it and/or modify it under either the terms of the GPL2
-or the conditions below:
-
-1. You may make and give away verbatim copies of the source form of the
- software without restriction, provided that you duplicate all of the
- original copyright notices and associated disclaimers.
-
-2. You may modify your copy of the software in any way, provided that
- you do at least ONE of the following:
-
- a) place your modifications in the Public Domain or otherwise make them
- Freely Available, such as by posting said modifications to Usenet or an
- equivalent medium, or by allowing the author to include your
- modifications in the software.
-
- b) use the modified software only within your corporation or
- organization.
-
- c) rename any non-standard executables so the names do not conflict with
- standard executables, which must also be provided.
-
- d) make other distribution arrangements with the author.
-
-3. You may distribute the software in object code or executable
- form, provided that you do at least ONE of the following:
-
- a) distribute the executables and library files of the software,
- together with instructions (in the manual page or equivalent) on where
- to get the original distribution.
-
- b) accompany the distribution with the machine-readable source of the
- software.
-
- c) give non-standard executables non-standard names, with
- instructions on where to get the original software distribution.
-
- d) make other distribution arrangements with the author.
-
-4. You may modify and include the part of the software into any other
- software (possibly commercial). But some files in the distribution
- are not written by the author, so that they are not under this terms.
-
-5. The scripts and library files supplied as input to or produced as
- output from the software do not automatically fall under the
- copyright of the software, but belong to whomever generated them,
- and may be sold commercially, and may be aggregated with this
- software.
-
-6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- PURPOSE.
-*/
-#include "http_parser.h"
-#include <limits.h>
-#include <assert.h>
-
-/* parser->flags */
-#define EATING 0x01
-#define ERROR 0x02
-#define CHUNKED 0x04
-#define EAT_FOREVER 0x10
-
-static int unhex[] = {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1
- ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
- };
-
-#undef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-
-#undef NULL
-#define NULL ((void*)(0))
-
-#define MAX_FIELD_SIZE (80*1024)
-
-#define REMAINING (unsigned long)(pe - p)
-#define CALLBACK(FOR) \
-do { \
- if (parser->FOR##_mark) { \
- parser->FOR##_size += p - parser->FOR##_mark; \
- if (parser->FOR##_size > MAX_FIELD_SIZE) { \
- parser->flags |= ERROR; \
- return; \
- } \
- if (parser->on_##FOR) { \
- callback_return_value = parser->on_##FOR(parser, \
- parser->FOR##_mark, \
- p - parser->FOR##_mark); \
- } \
- if (callback_return_value != 0) { \
- parser->flags |= ERROR; \
- return; \
- } \
- } \
-} while(0)
-
-#define RESET_PARSER(parser) \
- parser->chunk_size = 0; \
- parser->flags = 0; \
- parser->header_field_mark = NULL; \
- parser->header_value_mark = NULL; \
- parser->query_string_mark = NULL; \
- parser->path_mark = NULL; \
- parser->uri_mark = NULL; \
- parser->fragment_mark = NULL; \
- parser->status_code = 0; \
- parser->method = 0; \
- parser->version = HTTP_VERSION_OTHER; \
- parser->keep_alive = -1; \
- parser->content_length = -1; \
- parser->body_read = 0
-
-#define END_REQUEST \
-do { \
- if (parser->on_message_complete) { \
- callback_return_value = \
- parser->on_message_complete(parser); \
- } \
- RESET_PARSER(parser); \
-} while (0)
-
-#define SKIP_BODY(nskip) \
-do { \
- tmp = (nskip); \
- if (parser->on_body && tmp > 0) { \
- callback_return_value = parser->on_body(parser, p, tmp); \
- } \
- if (callback_return_value == 0) { \
- p += tmp; \
- parser->body_read += tmp; \
- parser->chunk_size -= tmp; \
- if (0 == parser->chunk_size) { \
- parser->flags &= ~EATING; \
- if (!(parser->flags & CHUNKED)) { \
- END_REQUEST; \
- } \
- } else { \
- parser->flags |= EATING; \
- } \
- } \
-} while (0)
-
-%%{
- machine http_parser;
-
- action mark_header_field {
- parser->header_field_mark = p;
- parser->header_field_size = 0;
- }
-
- action mark_header_value {
- parser->header_value_mark = p;
- parser->header_value_size = 0;
- }
-
- action mark_fragment {
- parser->fragment_mark = p;
- parser->fragment_size = 0;
- }
-
- action mark_query_string {
- parser->query_string_mark = p;
- parser->query_string_size = 0;
- }
-
- action mark_request_path {
- parser->path_mark = p;
- parser->path_size = 0;
- }
-
- action mark_request_uri {
- parser->uri_mark = p;
- parser->uri_size = 0;
- }
-
- action header_field {
- CALLBACK(header_field);
- parser->header_field_mark = NULL;
- parser->header_field_size = 0;
- }
-
- action header_value {
- CALLBACK(header_value);
- parser->header_value_mark = NULL;
- parser->header_value_size = 0;
- }
-
- action request_uri {
- CALLBACK(uri);
- parser->uri_mark = NULL;
- parser->uri_size = 0;
- }
-
- action fragment {
- CALLBACK(fragment);
- parser->fragment_mark = NULL;
- parser->fragment_size = 0;
- }
-
- action query_string {
- CALLBACK(query_string);
- parser->query_string_mark = NULL;
- parser->query_string_size = 0;
- }
-
- action request_path {
- CALLBACK(path);
- parser->path_mark = NULL;
- parser->path_size = 0;
- }
-
- action headers_complete {
- if(parser->on_headers_complete) {
- callback_return_value = parser->on_headers_complete(parser);
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
- }
-
- action begin_message {
- if(parser->on_message_begin) {
- callback_return_value = parser->on_message_begin(parser);
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
- }
-
- action content_length {
- if (parser->content_length == -1) parser->content_length = 0;
- if (parser->content_length > INT_MAX) {
- parser->flags |= ERROR;
- return;
- }
- parser->content_length *= 10;
- parser->content_length += *p - '0';
- }
-
- action status_code {
- parser->status_code *= 10;
- parser->status_code += *p - '0';
- }
-
- action use_chunked_encoding { parser->flags |= CHUNKED; }
-
- action set_keep_alive { parser->keep_alive = 1; }
- action set_not_keep_alive { parser->keep_alive = 0; }
-
- action version_11 { parser->version = HTTP_VERSION_11; }
- action version_10 { parser->version = HTTP_VERSION_10; }
- action version_09 { parser->version = HTTP_VERSION_09; }
-
- action add_to_chunk_size {
- parser->chunk_size *= 16;
- parser->chunk_size += unhex[(int)*p];
- }
-
- action skip_chunk_data {
- SKIP_BODY(MIN(parser->chunk_size, REMAINING));
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
-
- fhold;
- if (parser->chunk_size > REMAINING) {
- fbreak;
- } else {
- fgoto chunk_end;
- }
- }
-
- action end_chunked_body {
- END_REQUEST;
- if (parser->type == HTTP_REQUEST) {
- fnext Requests;
- } else {
- fnext Responses;
- }
- }
-
- action body_logic {
- if (parser->flags & CHUNKED) {
- fnext ChunkedBody;
- } else {
- /* this is pretty stupid. i'd prefer to combine this with
- * skip_chunk_data */
- if (parser->content_length < 0) {
- /* If we didn't get a content length; if not keep-alive
- * just read body until EOF */
- if (!http_parser_should_keep_alive(parser)) {
- parser->flags |= EAT_FOREVER;
- parser->chunk_size = REMAINING;
- } else {
- /* Otherwise, if keep-alive, then assume the message
- * has no body. */
- parser->chunk_size = parser->content_length = 0;
- }
- } else {
- parser->chunk_size = parser->content_length;
- }
- p += 1;
-
- SKIP_BODY(MIN(REMAINING, parser->chunk_size));
-
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
-
- fhold;
- if(parser->chunk_size > REMAINING) {
- fbreak;
- }
- }
- }
-
- CRLF = "\r\n";
-
-# character types
- CTL = (cntrl | 127);
- safe = ("$" | "-" | "_" | ".");
- extra = ("!" | "*" | "'" | "(" | ")" | ",");
- reserved = (";" | "/" | "?" | ":" | "@" | "&" | "=" | "+");
- unsafe = (CTL | " " | "\"" | "#" | "%" | "<" | ">");
- national = any -- (alpha | digit | reserved | extra | safe | unsafe);
- unreserved = (alpha | digit | safe | extra | national);
- escape = ("%" xdigit xdigit);
- uchar = (unreserved | escape | "\"");
- pchar = (uchar | ":" | "@" | "&" | "=" | "+");
- tspecials = ("(" | ")" | "<" | ">" | "@" | "," | ";" | ":" | "\\" | "\""
- | "/" | "[" | "]" | "?" | "=" | "{" | "}" | " " | "\t");
-
-# elements
- token = (ascii -- (CTL | tspecials));
- quote = "\"";
-# qdtext = token -- "\"";
-# quoted_pair = "\" ascii;
-# quoted_string = "\"" (qdtext | quoted_pair )* "\"";
-
-# headers
-
- Method = ( "COPY" %{ parser->method = HTTP_COPY; }
- | "DELETE" %{ parser->method = HTTP_DELETE; }
- | "GET" %{ parser->method = HTTP_GET; }
- | "HEAD" %{ parser->method = HTTP_HEAD; }
- | "LOCK" %{ parser->method = HTTP_LOCK; }
- | "MKCOL" %{ parser->method = HTTP_MKCOL; }
- | "MOVE" %{ parser->method = HTTP_MOVE; }
- | "OPTIONS" %{ parser->method = HTTP_OPTIONS; }
- | "POST" %{ parser->method = HTTP_POST; }
- | "PROPFIND" %{ parser->method = HTTP_PROPFIND; }
- | "PROPPATCH" %{ parser->method = HTTP_PROPPATCH; }
- | "PUT" %{ parser->method = HTTP_PUT; }
- | "TRACE" %{ parser->method = HTTP_TRACE; }
- | "UNLOCK" %{ parser->method = HTTP_UNLOCK; }
- ); # Not allowing extension methods
-
- HTTP_Version = "HTTP/" ( "1.1" %version_11
- | "1.0" %version_10
- | "0.9" %version_09
- | (digit "." digit)
- );
-
- scheme = ( alpha | digit | "+" | "-" | "." )* ;
- absolute_uri = (scheme ":" (uchar | reserved )*);
- path = ( pchar+ ( "/" pchar* )* ) ;
- query = ( uchar | reserved )* >mark_query_string %query_string ;
- param = ( pchar | "/" )* ;
- params = ( param ( ";" param )* ) ;
- rel_path = ( path? (";" params)? ) ;
- absolute_path = ( "/"+ rel_path ) >mark_request_path %request_path ("?" query)?;
- Request_URI = ( "*" | absolute_uri | absolute_path ) >mark_request_uri %request_uri;
- Fragment = ( uchar | reserved )* >mark_fragment %fragment;
-
- field_name = ( token -- ":" )+;
- Field_Name = field_name >mark_header_field %header_field;
-
- field_value = ((any - " ") any*)?;
- Field_Value = field_value >mark_header_value %header_value;
-
- hsep = ":" " "*;
- header = (field_name hsep field_value) :> CRLF;
- Header = ( ("Content-Length"i hsep digit+ $content_length)
- | ("Connection"i hsep
- ( "Keep-Alive"i %set_keep_alive
- | "close"i %set_not_keep_alive
- )
- )
- | ("Transfer-Encoding"i hsep "chunked"i %use_chunked_encoding)
- | (Field_Name hsep Field_Value)
- ) :> CRLF;
-
- Headers = (Header)* :> CRLF @headers_complete;
-
- Request_Line = ( Method " " Request_URI ("#" Fragment)? " " HTTP_Version CRLF ) ;
-
- StatusCode = (digit digit digit) $status_code;
- ReasonPhrase = ascii* -- ("\r" | "\n");
- StatusLine = HTTP_Version " " StatusCode (" " ReasonPhrase)? CRLF;
-
-# chunked message
- trailing_headers = header*;
- #chunk_ext_val = token | quoted_string;
- chunk_ext_val = token*;
- chunk_ext_name = token*;
- chunk_extension = ( ";" " "* chunk_ext_name ("=" chunk_ext_val)? )*;
- last_chunk = "0"+ ( chunk_extension | " "+) CRLF;
- chunk_size = (xdigit* [1-9a-fA-F] xdigit* ) $add_to_chunk_size;
- chunk_end = CRLF;
- chunk_body = any >skip_chunk_data;
- chunk_begin = chunk_size ( chunk_extension | " "+ ) CRLF;
- chunk = chunk_begin chunk_body chunk_end;
- ChunkedBody := chunk* last_chunk trailing_headers CRLF @end_chunked_body;
-
- Request = (Request_Line Headers) >begin_message @body_logic;
- Response = (StatusLine Headers) >begin_message @body_logic;
-
- Requests := Request*;
- Responses := Response*;
-
- main := any >{
- fhold;
- if (parser->type == HTTP_REQUEST) {
- fgoto Requests;
- } else {
- fgoto Responses;
- }
- };
-
-}%%
-
-%% write data;
-
-void
-http_parser_init (http_parser *parser, enum http_parser_type type)
-{
- int cs = 0;
- %% write init;
- parser->cs = cs;
- parser->type = type;
-
- parser->on_message_begin = NULL;
- parser->on_path = NULL;
- parser->on_query_string = NULL;
- parser->on_uri = NULL;
- parser->on_fragment = NULL;
- parser->on_header_field = NULL;
- parser->on_header_value = NULL;
- parser->on_headers_complete = NULL;
- parser->on_body = NULL;
- parser->on_message_complete = NULL;
-
- RESET_PARSER(parser);
-}
-
-/** exec **/
-void
-http_parser_execute (http_parser *parser, const char *buffer, size_t len)
-{
- size_t tmp; // REMOVE ME this is extremely hacky
- int callback_return_value = 0;
- const char *p, *pe, *eof;
- int cs = parser->cs;
-
- p = buffer;
- pe = buffer+len;
- eof = len ? NULL : pe;
-
- if (parser->flags & EAT_FOREVER) {
- if (len == 0) {
- if (parser->on_message_complete) {
- callback_return_value = parser->on_message_complete(parser);
- if (callback_return_value != 0) parser->flags |= ERROR;
- }
- } else {
- if (parser->on_body) {
- callback_return_value = parser->on_body(parser, p, len);
- if (callback_return_value != 0) parser->flags |= ERROR;
- }
- }
- return;
- }
-
- if (0 < parser->chunk_size && (parser->flags & EATING)) {
- /* eat body */
- SKIP_BODY(MIN(len, parser->chunk_size));
- if (callback_return_value != 0) {
- parser->flags |= ERROR;
- return;
- }
- }
-
- if (parser->header_field_mark) parser->header_field_mark = buffer;
- if (parser->header_value_mark) parser->header_value_mark = buffer;
- if (parser->fragment_mark) parser->fragment_mark = buffer;
- if (parser->query_string_mark) parser->query_string_mark = buffer;
- if (parser->path_mark) parser->path_mark = buffer;
- if (parser->uri_mark) parser->uri_mark = buffer;
-
- %% write exec;
-
- parser->cs = cs;
-
- CALLBACK(header_field);
- CALLBACK(header_value);
- CALLBACK(fragment);
- CALLBACK(query_string);
- CALLBACK(path);
- CALLBACK(uri);
-
- assert(p <= pe && "buffer overflow after parsing execute");
-}
-
-int
-http_parser_has_error (http_parser *parser)
-{
- if (parser->flags & ERROR) return 1;
- return parser->cs == http_parser_error;
-}
109 test.c
View
@@ -653,10 +653,8 @@ parse_messages (int message_count, const struct message *input_messages[])
parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, length);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
assert(num_messages == message_count);
@@ -665,32 +663,86 @@ parse_messages (int message_count, const struct message *input_messages[])
}
}
+static void
+print_error (const struct message *message, size_t error_location)
+{
+ printf("\n*** parse error on '%s' ***\n\n", message->name);
+
+ int this_line = 0, char_len = 0;
+ size_t i, j, len = strlen(message->raw), error_location_line = 0;
+ for (i = 0; i < len; i++) {
+ if (i == error_location) this_line = 1;
+ switch (message->raw[i]) {
+ case '\r':
+ char_len = 2;
+ printf("\\r");
+ break;
+
+ case '\n':
+ char_len = 2;
+ printf("\\n\n");
+
+ if (this_line) {
+ for (j = 0; j < error_location_line; j++) {
+ putchar(' ');
+ }
+ printf("^\n\nerror location: %d\n", error_location);
+ return;
+ }
+
+ error_location_line = 0;
+ continue;
+
+ default:
+ char_len = 1;
+ putchar(message->raw[i]);
+ break;
+ }
+ if (!this_line) error_location_line += char_len;
+ }
+}
+
void
test_message (const struct message *message)
{
parser_init(message->type);
- http_parser_execute(&parser, message->raw, strlen(message->raw));
- assert(!http_parser_has_error(&parser));
+ size_t read;
- http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
+ read = http_parser_execute(&parser, message->raw, strlen(message->raw));
+ if (read != strlen(message->raw)) {
+ print_error(message, read);
+ exit(1);
+ }
+
+ read = http_parser_execute(&parser, NULL, 0);
+ if (read != 0) {
+ print_error(message, read);
+ exit(1);
+ }
assert(num_messages == 1);
message_eq(0, message);
}
-void
+int
test_error (const char *buf)
{
parser_init(HTTP_REQUEST);
- http_parser_execute(&parser, buf, strlen(buf));
- http_parser_execute(&parser, NULL, 0);
+ size_t parsed;
+
+ parsed = http_parser_execute(&parser, buf, strlen(buf));
+ if (parsed != strlen(buf)) return 1;
+ parsed = http_parser_execute(&parser, NULL, 0);
+ if (parsed != 0) return 1;
+
+ printf("No error found in the following: %s\n", buf);
+ exit(1);
- assert(http_parser_has_error(&parser));
+ return 0;
}
void
@@ -710,10 +762,8 @@ test_multiple3 (const struct message *r1, const struct message *r2, const struct
parser_init(HTTP_REQUEST);
http_parser_execute(&parser, total, strlen(total));
- assert(!http_parser_has_error(&parser) );
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser) );
assert(num_messages == 3);
message_eq(0, r1);
@@ -773,16 +823,12 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
*/
http_parser_execute(&parser, buf1, buf1_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf2, buf2_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, buf3, buf3_len);
- assert(!http_parser_has_error(&parser));
http_parser_execute(&parser, NULL, 0);
- assert(!http_parser_has_error(&parser));
assert(3 == num_messages);
@@ -797,8 +843,6 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int
main (void)
{
- int i, j, k;
-
printf("sizeof(http_parser) = %d\n", sizeof(http_parser));
int request_count;
@@ -808,18 +852,6 @@ main (void)
for (response_count = 0; responses[response_count].name; response_count++);
- //// RESPONSES
-
- for (i = 0; i < response_count; i++) {
- test_message(&responses[i]);
- }
-
-
-
- puts("responses okay");
-
-
-
/// REQUESTS
@@ -871,15 +903,19 @@ main (void)
"HELLO";
test_error(bad_get_no_headers_no_body);
-
/* TODO sending junk and large headers gets rejected */
/* check to make sure our predefined requests are okay */
+ int i;
for (i = 0; requests[i].name; i++) {
test_message(&requests[i]);
}
+#if 0
+ int j, k;
+
+
for (i = 0; i < request_count; i++) {
for (j = 0; j < request_count; j++) {
for (k = 0; k < request_count; k++) {
@@ -910,5 +946,16 @@ main (void)
puts("requests okay");
+ //// RESPONSES
+
+ for (i = 0; i < response_count; i++) {
+ test_message(&responses[i]);
+ }
+
+
+
+ puts("responses okay");
+
+#endif
return 0;
}

6 comments on commit 433202d

Erich Ocean

Happy to see you going by hand, but the Ragel parser had the advantage of the graphical Dot state machine output. Are you maintaing a similar FSM for this version, and if so, would you please copy the .Dot file into the repository? Thanks!

ry

no, unfortunately not.

Sergey Shepelev

And new parser will not support custom methods?

ry

no

claudiusaiz

Hello,

I noticed that lines 771-781 in http_parser.c allow an HTTP header to contain only the field name, without the ':'.
For example, we could have a header like this:

"Accept-Encoding\r\n"

But, section 4.2 of HTTP RFC 2616 specifies that an HTTP header is defined like this:

message-header = field-name ":" [ field-value ]

, having the ':' character compulsory.

I am having some problems with this, and I was thinking of commenting out lines 771-781. Does anyone happen to know if this would have negative effects on the rest of the parser's functionality?

Thanks,
Claudiu

Ben Noordhuis
Collaborator

I wager you're looking at an old version of http-parser. With the current master (8081938) the logic you describe is around line 1300. But yeah, it will accept headers without values for the sake of interoperability (think: buggy servers.)

Please sign in to comment.
Something went wrong with that request. Please try again.