Skip to content

Commit

Permalink
This patch allows to upload binary multipart/form-data streams to pion.
Browse files Browse the repository at this point in the history
Such streams will be stored as BASE64 encoded data.
To restore original binary stream user may use provided function
parser::binary_2base64
  • Loading branch information
Serge committed Jan 2, 2016
1 parent d5df4be commit f9075e7
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 25 deletions.
40 changes: 31 additions & 9 deletions include/pion/http/parser.hpp
Expand Up @@ -328,17 +328,39 @@ class PION_API parser :
const char *ptr, const std::size_t len);

/**
* parse key-value pairs out of a multipart/form-data payload content
* (http://www.ietf.org/rfc/rfc2388.txt)
*
* @param dict dictionary for key-values pairs
* @param content_type value of the content-type HTTP header
* @param ptr points to the start of the encoded data
* @param len length of the encoded data, in bytes
*
* convert binary buffer to base64-encoded string with prefix signaled original stream MIME type
* @param out_val string for MIME encoded data
* @param buf points to the start of the binary data
* @param buf_size size of the binary data
* @param stream_type original stream MIME type
*
* @return bool true if successful
*/
static bool parse_multipart_form_data(ihash_multimap& dict,
static bool binary_2base64(std::string& out_val, const char *buf, const std::size_t buf_size, const std::string& stream_type);
/**
* convert base64-encoded string to binary buffer to with prefix signaled original stream MIME type
* @param out_buf buffer for MIME encoded data
* @param buf_size size of the input buffer for decoded data
* @param out_size required buffer size (in case return false) or used size of the buffer for decoded data
* @param out_stream_type original stream MIME type
* @param base64 encoded data
*
* @return bool true if successful
*/
static bool base64_2binary(char *out_buf, const std::size_t buf_size, std::size_t& out_size, std::string& out_stream_type, const std::string& base64);

/**
* parse key-value pairs out of a multipart/form-data payload content
* (http://www.ietf.org/rfc/rfc2388.txt)
*
* @param dict dictionary for key-values pairs
* @param content_type value of the content-type HTTP header
* @param ptr points to the start of the encoded data
* @param len length of the encoded data, in bytes
*
* @return bool true if successful
*/
static bool parse_multipart_form_data(ihash_multimap& dict,
const std::string& content_type,
const char *ptr, const std::size_t len);

Expand Down
116 changes: 105 additions & 11 deletions src/http_parser.cpp
Expand Up @@ -13,6 +13,15 @@
#include <boost/assert.hpp>
#include <boost/logic/tribool.hpp>
#include <boost/algorithm/string.hpp>

#include <boost/archive/iterators/base64_from_binary.hpp>
#include <boost/archive/iterators/binary_from_base64.hpp>
#include <boost/archive/iterators/insert_linebreaks.hpp>
#include <boost/archive/iterators/transform_width.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
#include <sstream>
#include <string>

#include <pion/algorithm.hpp>
#include <pion/http/parser.hpp>
#include <pion/http/request.hpp>
Expand Down Expand Up @@ -976,6 +985,81 @@ bool parser::parse_url_encoded(ihash_multimap& dict,
return true;
}

bool parser::binary_2base64(std::string& out_val, const char *buf, const std::size_t buf_size, const std::string& stream_type)
{
static const std::string padding[] = { "", "==", "=" };

if (buf == NULL)
return false;

using namespace boost::archive::iterators;
typedef
base64_from_binary< // convert binary values to base64 characters
transform_width< // retrieve 6 bit integers from a sequence of 8 bit bytes
const char *, 6, 8
>
>
binary_2base64; // compose all the above operations in to a new iterator

std::stringstream os;
std::copy(
binary_2base64(buf),
binary_2base64(buf + buf_size),
ostream_iterator<char>(os)
);
os << padding[buf_size % 3];

out_val.assign("data:");
out_val.append(stream_type);
out_val.append("; base64, ");
out_val.append(os.str());
return true;
}

bool parser::base64_2binary(char *out_buf, const std::size_t buf_size, std::size_t& out_size, std::string& out_stream_type, const std::string& base64)
{
using namespace boost::archive::iterators;
typedef
transform_width< // retrieve 8 bit integers from a sequence of 6 bit bytes
binary_from_base64<const char *>, // convert binary values to base64 characters
8,
6
>
base64_2binary; // compose all the above operations in to a new iterator

std::size_t size = base64.size();

out_size = 0;

if (false == boost::algorithm::equals(base64.substr(0, 5), "data:"))
return false;
std::size_t pos = base64.find("; base64, ");
if (pos == std::string::npos)
return false;
out_stream_type.assign(base64.substr(5, pos-5));
const std::size_t prefix_end_pos = pos + 10;

if (size && base64[size - 1] == '=') {
--size;
if (size && base64[size - 1] == '=')
--size;
}

out_size = size;
if (size == 0)
return true;
if (size > buf_size || out_buf == NULL)
return false;

std::copy(
base64_2binary(base64.data() + prefix_end_pos),
base64_2binary(base64.data() + size - prefix_end_pos),
out_buf
);

return true;
}

bool parser::parse_multipart_form_data(ihash_multimap& dict,
const std::string& content_type,
const char *ptr, const size_t len)
Expand All @@ -1002,9 +1086,10 @@ bool parser::parse_multipart_form_data(ihash_multimap& dict,
std::string header_name;
std::string header_value;
std::string field_name;
std::string field_value;
bool found_parameter = false;
bool save_current_field = true;
std::string field_value;
std::string content_type_header;
bool found_parameter = false;
bool do_mime64_convertion = true;
const char * const end_ptr = ptr + len;

ptr = std::search(ptr, end_ptr, boundary.begin(), boundary.end());
Expand All @@ -1017,7 +1102,8 @@ bool parser::parse_multipart_form_data(ihash_multimap& dict,
header_value.clear();
field_name.clear();
field_value.clear();
save_current_field = true;
content_type_header.clear();
do_mime64_convertion = true;
ptr += boundary.size() - 1;
parse_state = MP_PARSE_HEADER_CR;
break;
Expand Down Expand Up @@ -1079,8 +1165,9 @@ bool parser::parse_multipart_form_data(ihash_multimap& dict,
if (*ptr == '\r' || *ptr == '\n') {
// reached the end of the value -> check if it's important
if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_TYPE)) {
// only keep fields that have a text type or no type
save_current_field = boost::algorithm::iequals(header_value.substr(0, 5), "text/");
content_type_header.assign(header_value);
// do not encode fields that have a text type or no type
do_mime64_convertion = false == boost::algorithm::iequals(header_value.substr(0, 5), "text/");
} else if (boost::algorithm::iequals(header_name, types::HEADER_CONTENT_DISPOSITION)) {
// get current field from content-disposition header
std::size_t name_pos = header_value.find("name=\"");
Expand All @@ -1103,7 +1190,7 @@ bool parser::parse_multipart_form_data(ihash_multimap& dict,
// expecting final linefeed to terminate headers and begin field data
if (*ptr == '\n') {
// got it
if (save_current_field && !field_name.empty()) {
if (!field_name.empty()) {
// parse the field if we care & know enough about it
parse_state = MP_PARSE_FIELD_DATA;
} else {
Expand All @@ -1124,10 +1211,17 @@ bool parser::parse_multipart_form_data(ihash_multimap& dict,
field_end_ptr = temp_ptr;
else field_end_ptr = next_ptr;
}
field_value.assign(ptr, field_end_ptr - ptr);
// add the field to the query dictionary
dict.insert( std::make_pair(field_name, field_value) );
found_parameter = true;
do {
if (do_mime64_convertion) {
std::string stream_type;
if (false == binary_2base64(field_value, ptr, field_end_ptr - ptr, content_type_header))
break;
} else
field_value.assign(ptr, field_end_ptr - ptr);
// add the field to the query dictionary
dict.insert(std::make_pair(field_name, field_value));
found_parameter = true;
} while (false);
// skip ahead to next field
parse_state = MP_PARSE_START;
ptr = next_ptr;
Expand Down
20 changes: 15 additions & 5 deletions tests/http_parser_tests.cpp
Expand Up @@ -259,22 +259,32 @@ BOOST_AUTO_TEST_CASE(testParseMultipartFormData)
"\r\n"
"a\r\n"
"------WebKitFormBoundarynqrI4c1BfROrEpu7\r\n"
"Content-Disposition: form-data; name=\"skipme\"\r\n"
"Content-Disposition: form-data; name=\"donotskipme\"\r\n"
"content-type: application/octet-stream\r\n"
"\r\n"
"SKIP ME!\r\n"
"DO NOT SKIP ME!\r\n"
"------WebKitFormBoundarynqrI4c1BfROrEpu7\r\n"
"Content-Disposition: form-data; name=\"funny$field2\"\r\n"
"\r\n"
"funky test!\r\n"
"------WebKitFormBoundarynqrI4c1BfROrEpu7--");
ihash_multimap params;
BOOST_REQUIRE(http::parser::parse_multipart_form_data(params, "multipart/form-data; boundary=----WebKitFormBoundarynqrI4c1BfROrEpu7", FORM_DATA));
BOOST_CHECK_EQUAL(params.size(), 4UL);
BOOST_CHECK_EQUAL(params.size(), 5UL);
ihash_multimap::const_iterator i;

i = params.find("skipme");
BOOST_REQUIRE(i == params.end());
i = params.find("donotskipme");
BOOST_REQUIRE(i != params.end());
BOOST_CHECK_EQUAL(i->second.substr(0, 39), "data:application/octet-stream; base64, ");
char buf[256];
std::size_t size;
std::string content_type;
http::parser::base64_2binary(buf, 256, size, content_type, i->second);
BOOST_REQUIRE(size == 15);
BOOST_CHECK_EQUAL(content_type, "application/octet-stream");
buf[size] = '\0';
BOOST_CHECK_EQUAL(buf, "DO NOT SKIP ME!");


i = params.find("field1");
BOOST_REQUIRE(i != params.end());
Expand Down

0 comments on commit f9075e7

Please sign in to comment.