Skip to content

Commit

Permalink
RFC 9112: Improve HTTP chunked encoding compliance (#1498)
Browse files Browse the repository at this point in the history
  • Loading branch information
yadij committed Oct 14, 2023
1 parent 7de0196 commit 6cfa10d
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 15 deletions.
8 changes: 1 addition & 7 deletions src/http/one/Parser.cc
Expand Up @@ -65,16 +65,10 @@ Http::One::Parser::DelimiterCharacters()
void
Http::One::Parser::skipLineTerminator(Tokenizer &tok) const
{
if (tok.skip(Http1::CrLf()))
return;

if (Config.onoff.relaxed_header_parser && tok.skipOne(CharacterSet::LF))
return;

if (tok.atEnd() || (tok.remaining().length() == 1 && tok.remaining().at(0) == '\r'))
throw InsufficientInput();

throw TexcHere("garbage instead of CRLF line terminator");
tok.skipRequired("line-terminating CRLF", Http1::CrLf());
}

/// all characters except the LF line terminator
Expand Down
4 changes: 1 addition & 3 deletions src/http/one/Parser.h
Expand Up @@ -124,9 +124,7 @@ class Parser : public RefCountable
* detect and skip the CRLF or (if tolerant) LF line terminator
* consume from the tokenizer.
*
* \throws exception on bad or InsuffientInput.
* \retval true only if line terminator found.
* \retval false incomplete or missing line terminator, need more data.
* \throws exception on bad or InsufficientInput
*/
void skipLineTerminator(Tokenizer &) const;

Expand Down
23 changes: 18 additions & 5 deletions src/http/one/TeChunkedParser.cc
Expand Up @@ -91,6 +91,11 @@ Http::One::TeChunkedParser::parseChunkSize(Tokenizer &tok)
{
Must(theChunkSize <= 0); // Should(), really

static const SBuf bannedHexPrefixLower("0x");
static const SBuf bannedHexPrefixUpper("0X");
if (tok.skip(bannedHexPrefixLower) || tok.skip(bannedHexPrefixUpper))
throw TextException("chunk starts with 0x", Here());

int64_t size = -1;
if (tok.int64(size, 16, false) && !tok.atEnd()) {
if (size < 0)
Expand Down Expand Up @@ -121,7 +126,7 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
// bad or insufficient input, like in the code below. TODO: Expand up.
try {
parseChunkExtensions(tok); // a possibly empty chunk-ext list
skipLineTerminator(tok);
tok.skipRequired("CRLF after [chunk-ext]", Http1::CrLf());
buf_ = tok.remaining();
parsingStage_ = theChunkSize ? Http1::HTTP_PARSE_CHUNK : Http1::HTTP_PARSE_MIME;
return true;
Expand All @@ -132,19 +137,22 @@ Http::One::TeChunkedParser::parseChunkMetadataSuffix(Tokenizer &tok)
// other exceptions bubble up to kill message parsing
}

/// Parses the chunk-ext list (RFC 7230 section 4.1.1 and its Errata #4667):
/// Parses the chunk-ext list (RFC 9112 section 7.1.1:
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
void
Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &tok)
Http::One::TeChunkedParser::parseChunkExtensions(Tokenizer &callerTok)
{
do {
auto tok = callerTok;

ParseBws(tok); // Bug 4492: IBM_HTTP_Server sends SP after chunk-size

if (!tok.skip(';'))
return; // reached the end of extensions (if any)

parseOneChunkExtension(tok);
buf_ = tok.remaining(); // got one extension
callerTok = tok;
} while (true);
}

Expand All @@ -158,11 +166,14 @@ Http::One::ChunkExtensionValueParser::Ignore(Tokenizer &tok, const SBuf &extName
/// Parses a single chunk-ext list element:
/// chunk-ext = *( BWS ";" BWS chunk-ext-name [ BWS "=" BWS chunk-ext-val ] )
void
Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &callerTok)
{
auto tok = callerTok;

ParseBws(tok); // Bug 4492: ICAP servers send SP before chunk-ext-name

const auto extName = tok.prefix("chunk-ext-name", CharacterSet::TCHAR);
callerTok = tok; // in case we determine that this is a valueless chunk-ext

ParseBws(tok);

Expand All @@ -176,6 +187,8 @@ Http::One::TeChunkedParser::parseOneChunkExtension(Tokenizer &tok)
customExtensionValueParser->parse(tok, extName);
else
ChunkExtensionValueParser::Ignore(tok, extName);

callerTok = tok;
}

bool
Expand Down Expand Up @@ -209,7 +222,7 @@ Http::One::TeChunkedParser::parseChunkEnd(Tokenizer &tok)
Must(theLeftBodySize == 0); // Should(), really

try {
skipLineTerminator(tok);
tok.skipRequired("chunk CRLF", Http1::CrLf());
buf_ = tok.remaining(); // parse checkpoint
theChunkSize = 0; // done with the current chunk
parsingStage_ = Http1::HTTP_PARSE_CHUNK_SZ;
Expand Down
12 changes: 12 additions & 0 deletions src/parser/Tokenizer.cc
Expand Up @@ -147,6 +147,18 @@ Parser::Tokenizer::skipAll(const CharacterSet &tokenChars)
return success(prefixLen);
}

void
Parser::Tokenizer::skipRequired(const char *description, const SBuf &tokenToSkip)
{
if (skip(tokenToSkip) || tokenToSkip.isEmpty())
return;

if (tokenToSkip.startsWith(buf_))
throw InsufficientInput();

throw TextException(ToSBuf("cannot skip ", description), Here());
}

bool
Parser::Tokenizer::skipOne(const CharacterSet &chars)
{
Expand Down
7 changes: 7 additions & 0 deletions src/parser/Tokenizer.h
Expand Up @@ -115,6 +115,13 @@ class Tokenizer
*/
SBuf::size_type skipAll(const CharacterSet &discardables);

/** skips a given character sequence (string);
* does nothing if the sequence is empty
*
* \throws exception on mismatching prefix or InsufficientInput
*/
void skipRequired(const char *description, const SBuf &tokenToSkip);

/** Removes a single trailing character from the set.
*
* \return whether a character was removed
Expand Down

0 comments on commit 6cfa10d

Please sign in to comment.