Skip to content

Commit

Permalink
Store POST request data as JSON in the database.
Browse files Browse the repository at this point in the history
Incorporated some code from: https://github.com/redline13/selenium-jmeter

Handle various content encodings for the POST data (encType).
This includes `text/plain`, `application/x-www-form-urlencoded` and
`multipart/form-data`.

Store "request headers from upload stream" in the database.
We add additional headers from POST body to existin headers column.

Discard OCSP (POST) requests to prevent noise.

Add tests for form submissions with different encodings by JavaScript.
Add tests for making POST requests with AJAX (jQuery) and FormData.
Add test for byte array POST data.

Handle exception when looking up the `Content-Type` response header.

Add shared/utils.js file. We can add common test functions to this file.

Fix some style inconsistencies.
  • Loading branch information
gunesacar committed Dec 28, 2016
1 parent d3a9445 commit f4cc4ef
Show file tree
Hide file tree
Showing 10 changed files with 471 additions and 12 deletions.
5 changes: 5 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ https://github.com/EFForg/privacybadgerfirefox
Copyright © 2015 Electronic Frontier Foundation and other contributors
Licensed GPLv3+

Incorporating code from selenium-jmeter,
https://github.com/redline13/selenium-jmeter
By Richard Friedman
Licensed GPLv3+

Text of GPLv3 License:
======================
GNU GENERAL PUBLIC LICENSE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ CREATE TABLE IF NOT EXISTS http_requests(
loading_href TEXT,
req_call_stack TEXT,
content_policy_type INTEGER NOT NULL,
time_stamp TEXT NOT NULL
time_stamp TEXT NOT NULL,
post_body TEXT
);
46 changes: 42 additions & 4 deletions automation/Extension/firefox/lib/http-instrument.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const {Cc, Ci, CC, Cu, components} = require("chrome");
const events = require("sdk/system/events");
const data = require("sdk/self").data;
var loggingDB = require("./loggingdb.js");
var httpPostParser = require("./http-post-parser.js");

var BinaryInputStream = CC('@mozilla.org/binaryinputstream;1',
'nsIBinaryInputStream', 'setInputStream');
Expand All @@ -23,7 +24,7 @@ converter.charset = "UTF-8";
* HTTP Request Handler and Helper Functions
*/

function get_stack_trace_str(){
function get_stack_trace_str() {
// return the stack trace as a string
// TODO: check if http-on-modify-request is a good place to capture the stack
// In the manual tests we could capture exactly the same trace as the
Expand Down Expand Up @@ -69,20 +70,50 @@ var httpRequestHandler = function(reqEvent, crawlID) {
update["method"] = loggingDB.escapeString(requestMethod);

var referrer = "";
if(httpChannel.referrer)
if (httpChannel.referrer)
referrer = httpChannel.referrer.spec;
update["referrer"] = loggingDB.escapeString(referrer);

var current_time = new Date();
update["time_stamp"] = current_time.toISOString();

var encodingType = "";
var headers = [];
var isOcsp= false;
httpChannel.visitRequestHeaders({visitHeader: function(name, value) {
var header_pair = [];
header_pair.push(loggingDB.escapeString(name));
header_pair.push(loggingDB.escapeString(value));
headers.push(header_pair);
if (name == "Content-Type") {
encodingType = value;
if (encodingType.indexOf("application/ocsp-request") != -1)
isOcsp = true;
}
}});

if (requestMethod == 'POST' && !isOcsp) { // don't process OCSP requests
reqEvent.subject.QueryInterface(components.interfaces.nsIUploadChannel);
if (reqEvent.subject.uploadStream) {
reqEvent.subject.uploadStream.QueryInterface(components.interfaces.nsISeekableStream);
var postParser = new httpPostParser.HttpPostParser(reqEvent.subject.uploadStream);
var postObj = postParser.parsePostRequest(encodingType);

// Add (POST) request headers from upload stream
if ("post_headers" in postObj) {
for (var name in postObj["post_headers"]) {
var header_pair = [];
header_pair.push(loggingDB.escapeString(name));
header_pair.push(loggingDB.escapeString(postObj["post_headers"][name]));
headers.push(header_pair);
}
}
// we store POST body in JSON format, except when it's a string without a (key-value) structure
if ("post_body" in postObj)
update["post_body"] = postObj["post_body"];
}
}

update["headers"] = JSON.stringify(headers);

// Check if xhr
Expand Down Expand Up @@ -262,7 +293,14 @@ function isJS(httpChannel) {
contentPolicyType != 16 && // websocket
contentPolicyType != 19) // beacon response
return false;
var contentType = httpChannel.getResponseHeader("Content-Type");

var contentType;
try {
contentType = httpChannel.getResponseHeader("Content-Type");
}catch (e) { // Content-Type may not be present
contentType = "";
}

if (contentType && contentType.toLowerCase().includes('javascript'))
return true;
var path = httpChannel.URI.path;
Expand Down Expand Up @@ -295,7 +333,7 @@ var httpResponseHandler = function(respEvent, isCached, crawlID, saveJavascript)
update["method"] = loggingDB.escapeString(requestMethod);

var referrer = "";
if(httpChannel.referrer)
if (httpChannel.referrer)
referrer = httpChannel.referrer.spec;
update["referrer"] = loggingDB.escapeString(referrer);

Expand Down
259 changes: 259 additions & 0 deletions automation/Extension/firefox/lib/http-post-parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
// Incorporates code from: https://github.com/redline13/selenium-jmeter/blob/6966d4b326cd78261e31e6e317076569051cac37/content/library/recorder/HttpPostParser.js

const {Cc, Ci, CC, Cu, components} = require("chrome");
var loggingDB = require("./loggingdb.js");

var HttpPostParser = function(stream) {
// Scriptable Stream Constants
this.seekablestream = stream;
this.stream = components.classes["@mozilla.org/scriptableinputstream;1"].createInstance(components.interfaces.nsIScriptableInputStream);
this.stream.init(this.seekablestream);

this.postBody = "";
this.postLines = [];
this.postHeaders = [];
// Check if the stream has headers
this.hasheaders = false;
this.body = 0;
if (this.seekablestream instanceof components.interfaces.nsIMIMEInputStream) {
this.seekablestream.QueryInterface(components.interfaces.nsIMIMEInputStream);
this.hasheaders = true;
this.body = -1;
} else if (this.seekablestream instanceof components.interfaces.nsIStringInputStream) {
this.seekablestream.QueryInterface(components.interfaces.nsIStringInputStream);
this.hasheaders = true;
this.body = -1;
}
}

HttpPostParser.prototype.rewind = function() {
this.seekablestream.seek(0, 0);
};

HttpPostParser.prototype.tell = function() {
return this.seekablestream.tell();
};

HttpPostParser.prototype.readLine = function() {
var line = "";
var size = this.stream.available();
for (var i = 0; i < size; i++) {
var c = this.stream.read(1);
if (c == '\r') {
} else if (c == '\n') {
break;
} else {
line += c;
}
}
return line;
};

// visitor can be null, function has side-effect of setting body
HttpPostParser.prototype.headers = function() {
if (this.hasheaders) {
this.rewind();
var line = this.readLine();
while (line) {
var keyValue = line.match(/^([^:]+):\s?(.*)/);
// match can return null...
if (keyValue) {
this.postHeaders[keyValue[1]] = keyValue[2];
} else {
this.postLines.push(line);
}
line = this.readLine();
}
this.body = this.tell();
}
};

HttpPostParser.prototype.convertTextPlainToUrlEncoded = function(postBody){
/* Convert from text/plain to application/x-www-form-urlencoded
* This is to unify the encoding so that we can parse different encodings at one place
See, https://developer.mozilla.org/en-US/docs/Web/API/XMLHttpRequest/Using_XMLHttpRequest#Using_nothing_but_XMLHttpRequest
We convert from (text/plain )...
foo=bar
baz=The first line.
The second line.
to (application/x-www-form-urlencoded):
foo=bar&baz=The+first+line.%0D%0AThe+second+line.%0D%0A
*/
var lines = postBody.split("\n");
var post_vars = [];
for (var line of lines){
if (line.indexOf("=") != -1) {
post_vars.push(encodeURIComponent(line.trim()));
} else {
var x = encodeURIComponent("\r\n" + line.trim());
post_vars.push(post_vars.pop() + x);
}
}
return post_vars.join("&");
}

HttpPostParser.prototype.parseEncodedFormData = function(formData, encodingType){
var obj = {};

if (formData.indexOf("=") == -1) // not key=value form
return formData;

try{
if (encodingType.indexOf("text/plain") != -1)
formData = this.convertTextPlainToUrlEncoded(formData);

formData = decodeURIComponent(formData.replace(/\+/g, " "));
// read key=value pairs, based on http://stackoverflow.com/a/8648962
formData.replace(/([^=&]+)=([^&]*)/g, function(m, key, value) {
obj[key] = value;
});
return JSON.stringify(obj);
}catch (e) {
console.log("Exception: Cannot parse POST data:", e, encodingType, "formData:", formData);
return formData; // return the original body if we can't decode
}
}


HttpPostParser.prototype.parsePostRequest = function(encodingType){
try {
this.parseStream();
} catch (e) {
console.log( "Exception: Failed to parse POST", e );
return {};
}

var isMultiPart = false;
var postBody = this.postBody;
var postHeaders = this.postHeaders; // request headers from upload stream
// See, http://stackoverflow.com/questions/16548517/what-is-request-headers-from-upload-stream

// post lines are part of the post body that are not in key=value form
var postLinesStr = this.postLines.join('\r\n');

// add encodingType found in "request headers from upload stream"
if (!encodingType && postHeaders && ("Content-Type" in postHeaders))
encodingType = postHeaders["Content-Type"];

if (encodingType.indexOf("multipart/form-data") != -1)
isMultiPart = true;

var formData = postBody;
if (!postBody && postLinesStr){
// We've never observed a case where the post body is empty but the post lines are not.
// The jmeter code handles the case:
// https://github.com/redline13/selenium-jmeter/blob/6966d4b326cd78261e31e6e317076569051cac37/content/library/selenium-jmeter.js#L67-L72
// We add the following string to debug:
formData = postLinesStr + "__DEBUG_EMPTY_POST_BODY_TO_LINES_FALLBACK__";
// TODO: Remove the debug string after testing with a small scale crawl.
}

var jsonFormData = "";
var parsedPostBody = "";
if (isMultiPart) {
jsonFormData = this.parseMultiPartData(formData, encodingType);
parsedPostBody = loggingDB.escapeString(jsonFormData);
} else {
jsonFormData = this.parseEncodedFormData(formData, encodingType);
parsedPostBody = loggingDB.escapeString(jsonFormData);
}
return {post_headers: postHeaders, post_body: parsedPostBody};
};



HttpPostParser.prototype.parseMultiPartData = function(formData, encodingType) {
/*
* Parse POST bodies with encType "multipart/form-data encoded"
* https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types#multipartform-data
*
* formData is in the following form:
*
-----------------------------12972102761018453617355621459
Content-Disposition: form-data; name="email"
test@example.com
-----------------------------12972102761018453617355621459
Content-Disposition: form-data; name="username"
name surname+
-----------------------------12972102761018453617355621459--
*/
var boundary = "";
var firstLine = formData.split("\r\n", 1)[0];
if (firstLine.startsWith("-----------------------------"))
boundary = firstLine;
else
return formData; // return unparsed data, if we fail to find the boundary string

var formVars = {};
for (var part of formData.split(boundary)){
partData = this.parseSinglePart(part);
if ("key" in partData && "value" in partData)
formVars[partData["key"]] = partData["value"];
}
return JSON.stringify(formVars);
}


HttpPostParser.prototype.parseSinglePart = function(part) {
/*
* Parse a single part of a multipart request body
* e.g., one part is as follows:
Content-Disposition: form-data; name="username"
name surname+
*/
part = part.trim();
if (!part || part === "--") // ignore empty parts or extra characters after the last boundary
return {};

var partLines = part.split("\r\n");

var matchVarName = partLines[0].match(/Content-Disposition:.*;.name="([^"]*)"/);
if (matchVarName) {
return {key: matchVarName[1],
value: partLines.slice(1).join("\r\n").trim()};
} else {
console.log("Can't find the POST form data variable name in", part);
return {};
}
}

HttpPostParser.prototype.parseStream = function() {
// Position the stream to the start of the body
if (this.body < 0 || this.seekablestream.tell() != this.body) {
this.headers();
}

var size = this.stream.available();
if (size == 0 && this.body != 0) {
// whoops, there weren't really headers..
this.rewind();
this.hasheaders = false;
size = this.stream.available();
}
var postString = "";
try {
// This is to avoid 'NS_BASE_STREAM_CLOSED' exception that may occurs
// See bug #188328.
for (var i = 0; i < size; i++) {
var c = this.stream.read(1);
c ? postString += c : postString += '\0';
}
} catch (ex) {
console.log("Error parsing the POST request", ex);
return "";
} finally {
this.rewind();
}

// strip off trailing \r\n's
while (postString.indexOf("\r\n") == (postString.length - 2)) {
postString = postString.substring(0, postString.length - 2);
}
this.postBody = postString.trim();
};

exports.HttpPostParser = HttpPostParser;
Binary file modified automation/Extension/firefox/openwpm.xpi
Binary file not shown.
Loading

0 comments on commit f4cc4ef

Please sign in to comment.