Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

IS_* macros for char classes.

- Add IS_ALPHA(), IS_NUM(), IS_HOST_CHAR(), etc. macros for determining
  membership in a character class. HTTP_PARSER_STRICT causes some of
  these definitions to change.
- Support '_' character in hostnames in non-strict mode.
- Support leading digits in hostnames when the method is HTTP_CONNECT.
- Don't re-define HTTP_PARSER_STRICT in http_parser.h if it's already
  defined.
- Tweak Makefile to run non-strict-mode unit tests. Rearrange non-strict
  mode unit tests in test.c.
- Add test_fast to .gitignore.

Fixes #44
  • Loading branch information...
commit 3bd18a779e880d996fda3cf4c35ef4dc4f6a24e1 1 parent fb23d15
@pgriess pgriess authored ry committed
Showing with 102 additions and 91 deletions.
  1. +1 −0  .gitignore
  2. +5 −4 Makefile
  3. +44 −59 http_parser.c
  4. +52 −28 test.c
View
1  .gitignore
@@ -2,3 +2,4 @@ tags
*.o
test
test_g
+test_fast
View
9 Makefile
@@ -1,13 +1,14 @@
-CPPFLAGS?=-DHTTP_PARSER_STRICT=1
-OPT_DEBUG=-O0 -g -Wall -Wextra -Werror -I. $(CPPFLAGS)
-OPT_FAST=-O3 -DHTTP_PARSER_STRICT=0 -I. $(CPPFLAGS)
+CPPFLAGS?=-Wall -Wextra -Werror -I.
+OPT_DEBUG=$(CPPFLAGS) -O0 -g -DHTTP_PARSER_STRICT=1
+OPT_FAST=$(CPPFLAGS) -O3 -DHTTP_PARSER_STRICT=0
CC?=gcc
AR?=ar
-test: test_g
+test: test_g test_fast
./test_g
+ ./test_fast
test_g: http_parser_g.o test_g.o
$(CC) $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
View
103 http_parser.c
@@ -189,33 +189,7 @@ static const uint8_t normal_url_char[256] = {
/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */
1, 1, 1, 1, 1, 1, 1, 1,
/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */
- 1, 1, 1, 1, 1, 1, 1, 0,
-
-#if HTTP_PARSER_STRICT
- 0
-#else
-/* Remainder of non-ASCII range are accepted as-is to support implicitly UTF-8
- encoded paths. This is out of spec, but clients generate this and most other
- HTTP servers support it. We should, too. */
-
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1
-#endif
-}; /* normal_url_char */
+ 1, 1, 1, 1, 1, 1, 1, 0, };
enum state
@@ -319,10 +293,24 @@ enum header_states
};
-#define CR '\r'
-#define LF '\n'
-#define LOWER(c) (unsigned char)(c | 0x20)
-#define TOKEN(c) tokens[(unsigned char)c]
+/* Macros for character classes; depends on strict-mode */
+#define CR '\r'
+#define LF '\n'
+#define LOWER(c) (unsigned char)(c | 0x20)
+#define TOKEN(c) (tokens[(unsigned char)c])
+#define IS_ALPHA(c) ((c) >= 'a' && (c) <= 'z')
+#define IS_NUM(c) ((c) >= '0' && (c) <= '9')
+#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
+
+#if HTTP_PARSER_STRICT
+#define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)])
+#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-')
+#else
+#define IS_URL_CHAR(c) \
+ (normal_url_char[(unsigned char) (c)] || ((c) & 0x80))
+#define IS_HOST_CHAR(c) \
+ (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
+#endif
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
@@ -499,7 +487,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_major *= 10;
parser->http_major += ch - '0';
@@ -510,7 +498,7 @@ size_t http_parser_execute (http_parser *parser,
/* first digit of minor HTTP version */
case s_res_first_http_minor:
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_minor = ch - '0';
state = s_res_http_minor;
break;
@@ -523,7 +511,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_minor *= 10;
parser->http_minor += ch - '0';
@@ -534,7 +522,7 @@ size_t http_parser_execute (http_parser *parser,
case s_res_first_status_code:
{
- if (ch < '0' || ch > '9') {
+ if (!IS_NUM(ch)) {
if (ch == ' ') {
break;
}
@@ -547,7 +535,7 @@ size_t http_parser_execute (http_parser *parser,
case s_res_status_code:
{
- if (ch < '0' || ch > '9') {
+ if (!IS_NUM(ch)) {
switch (ch) {
case ' ':
state = s_res_status;
@@ -599,7 +587,7 @@ size_t http_parser_execute (http_parser *parser,
CALLBACK2(message_begin);
- if (ch < 'A' || 'Z' < ch) goto error;
+ if (!IS_ALPHA(LOWER(ch))) goto error;
start_req_method_assign:
parser->method = (enum http_method) 0;
@@ -678,9 +666,13 @@ size_t http_parser_execute (http_parser *parser,
c = LOWER(ch);
- if (c >= 'a' && c <= 'z') {
+ /* Proxied requests are followed by scheme of an absolute URI (alpha).
+ * CONNECT is followed by a hostname, which begins with alphanum.
+ * All other methods are followed by '/' or '*' (handled above).
+ */
+ if (IS_ALPHA(ch) || (parser->method == HTTP_CONNECT && IS_NUM(ch))) {
MARK(url);
- state = s_req_schema;
+ state = (parser->method == HTTP_CONNECT) ? s_req_host : s_req_schema;
break;
}
@@ -691,17 +683,11 @@ size_t http_parser_execute (http_parser *parser,
{
c = LOWER(ch);
- if (c >= 'a' && c <= 'z') break;
+ if (IS_ALPHA(c)) break;
if (ch == ':') {
state = s_req_schema_slash;
break;
- } else if (ch == '.') {
- state = s_req_host;
- break;
- } else if ('0' <= ch && ch <= '9') {
- state = s_req_host;
- break;
}
goto error;
@@ -720,8 +706,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_host:
{
c = LOWER(ch);
- if (c >= 'a' && c <= 'z') break;
- if ((ch >= '0' && ch <= '9') || ch == '.' || ch == '-') break;
+ if (IS_HOST_CHAR(ch)) break;
switch (ch) {
case ':':
state = s_req_port;
@@ -749,7 +734,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_port:
{
- if (ch >= '0' && ch <= '9') break;
+ if (IS_NUM(ch)) break;
switch (ch) {
case '/':
MARK(path);
@@ -774,7 +759,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_path:
{
- if (normal_url_char[(unsigned char)ch]) break;
+ if (IS_URL_CHAR(ch)) break;
switch (ch) {
case ' ':
@@ -812,7 +797,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_query_string_start:
{
- if (normal_url_char[(unsigned char)ch]) {
+ if (IS_URL_CHAR(ch)) {
MARK(query_string);
state = s_req_query_string;
break;
@@ -848,7 +833,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_query_string:
{
- if (normal_url_char[(unsigned char)ch]) break;
+ if (IS_URL_CHAR(ch)) break;
switch (ch) {
case '?':
@@ -885,7 +870,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_fragment_start:
{
- if (normal_url_char[(unsigned char)ch]) {
+ if (IS_URL_CHAR(ch)) {
MARK(fragment);
state = s_req_fragment;
break;
@@ -922,7 +907,7 @@ size_t http_parser_execute (http_parser *parser,
case s_req_fragment:
{
- if (normal_url_char[(unsigned char)ch]) break;
+ if (IS_URL_CHAR(ch)) break;
switch (ch) {
case ' ':
@@ -1000,7 +985,7 @@ size_t http_parser_execute (http_parser *parser,
break;
}
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_major *= 10;
parser->http_major += ch - '0';
@@ -1011,7 +996,7 @@ size_t http_parser_execute (http_parser *parser,
/* first digit of minor HTTP version */
case s_req_first_http_minor:
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_minor = ch - '0';
state = s_req_http_minor;
break;
@@ -1031,7 +1016,7 @@ size_t http_parser_execute (http_parser *parser,
/* XXX allow spaces after digit? */
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->http_minor *= 10;
parser->http_minor += ch - '0';
@@ -1264,7 +1249,7 @@ size_t http_parser_execute (http_parser *parser,
break;
case h_content_length:
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->content_length = ch - '0';
break;
@@ -1313,7 +1298,7 @@ size_t http_parser_execute (http_parser *parser,
case h_content_length:
if (ch == ' ') break;
- if (ch < '0' || ch > '9') goto error;
+ if (!IS_NUM(ch)) goto error;
parser->content_length *= 10;
parser->content_length += ch - '0';
break;
View
80 test.c
@@ -498,7 +498,7 @@ const struct message requests[] =
#define CONNECT_REQUEST 17
, {.name = "connect request"
,.type= HTTP_REQUEST
- ,.raw= "CONNECT home0.netscape.com:443 HTTP/1.0\r\n"
+ ,.raw= "CONNECT 0-home0.netscape.com:443 HTTP/1.0\r\n"
"User-agent: Mozilla/1.1N\r\n"
"Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
"\r\n"
@@ -510,7 +510,7 @@ const struct message requests[] =
,.query_string= ""
,.fragment= ""
,.request_path= ""
- ,.request_url= "home0.netscape.com:443"
+ ,.request_url= "0-home0.netscape.com:443"
,.num_headers= 2
,.upgrade=1
,.headers= { { "User-agent", "Mozilla/1.1N" }
@@ -582,30 +582,7 @@ const struct message requests[] =
,.body= ""
}
-#if !HTTP_PARSER_STRICT
-#define UTF8_PATH_REQ 21
-, {.name= "utf-8 path request"
- ,.type= HTTP_REQUEST
- ,.raw= "GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\n"
- "Host: github.com\r\n"
- "\r\n"
- ,.should_keep_alive= TRUE
- ,.message_complete_on_eof= FALSE
- ,.http_major= 1
- ,.http_minor= 1
- ,.method= HTTP_GET
- ,.query_string= "q=1"
- ,.fragment= "narf"
- ,.request_path= "/δ¶/δt/pope"
- ,.request_url= "/δ¶/δt/pope?q=1#narf"
- ,.num_headers= 1
- ,.headers= { {"Host", "github.com" }
- }
- ,.body= ""
- }
-#endif /* !HTTP_PARSER_STRICT */
-
-#define QUERY_TERMINATED_HOST 22
+#define QUERY_TERMINATED_HOST 21
, {.name= "host terminated by a query string"
,.type= HTTP_REQUEST
,.raw= "GET http://hypnotoad.org?hail=all HTTP/1.1\r\n"
@@ -624,7 +601,7 @@ const struct message requests[] =
,.body= ""
}
-#define QUERY_TERMINATED_HOSTPORT 23
+#define QUERY_TERMINATED_HOSTPORT 22
, {.name= "host:port terminated by a query string"
,.type= HTTP_REQUEST
,.raw= "GET http://hypnotoad.org:1234?hail=all HTTP/1.1\r\n"
@@ -643,7 +620,7 @@ const struct message requests[] =
,.body= ""
}
-#define SPACE_TERMINATED_HOSTPORT 24
+#define SPACE_TERMINATED_HOSTPORT 23
, {.name= "host:port terminated by a space"
,.type= HTTP_REQUEST
,.raw= "GET http://hypnotoad.org:1234 HTTP/1.1\r\n"
@@ -662,6 +639,53 @@ const struct message requests[] =
,.body= ""
}
+#if !HTTP_PARSER_STRICT
+#define UTF8_PATH_REQ 24
+, {.name= "utf-8 path request"
+ ,.type= HTTP_REQUEST
+ ,.raw= "GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\n"
+ "Host: github.com\r\n"
+ "\r\n"
+ ,.should_keep_alive= TRUE
+ ,.message_complete_on_eof= FALSE
+ ,.http_major= 1
+ ,.http_minor= 1
+ ,.method= HTTP_GET
+ ,.query_string= "q=1"
+ ,.fragment= "narf"
+ ,.request_path= "/δ¶/δt/pope"
+ ,.request_url= "/δ¶/δt/pope?q=1#narf"
+ ,.num_headers= 1
+ ,.headers= { {"Host", "github.com" }
+ }
+ ,.body= ""
+ }
+
+#define HOSTNAME_UNDERSCORE
+, {.name = "hostname underscore"
+ ,.type= HTTP_REQUEST
+ ,.raw= "CONNECT home_0.netscape.com:443 HTTP/1.0\r\n"
+ "User-agent: Mozilla/1.1N\r\n"
+ "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
+ "\r\n"
+ ,.should_keep_alive= FALSE
+ ,.message_complete_on_eof= FALSE
+ ,.http_major= 1
+ ,.http_minor= 0
+ ,.method= HTTP_CONNECT
+ ,.query_string= ""
+ ,.fragment= ""
+ ,.request_path= ""
+ ,.request_url= "home_0.netscape.com:443"
+ ,.num_headers= 2
+ ,.upgrade=1
+ ,.headers= { { "User-agent", "Mozilla/1.1N" }
+ , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
+ }
+ ,.body= ""
+ }
+#endif /* !HTTP_PARSER_STRICT */
+
, {.name= NULL } /* sentinel */
};
Please sign in to comment.
Something went wrong with that request. Please try again.