Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ef325bf
Showing
3 changed files
with
238 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
Copyright (c) 2011, Matthias Vallentin | ||
|
||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are | ||
met: | ||
|
||
* Redistributions of source code must retain the above copyright | ||
notice, this list of conditions and the following disclaimer. | ||
|
||
* Redistributions in binary form must reproduce the above copyright | ||
notice, this list of conditions and the following disclaimer in | ||
the documentation and/or other materials provided with the | ||
distribution. | ||
|
||
* Neither the name of Matthias Vallentin | ||
nor the names of the contributors may be used to endorse or | ||
promote products derived from this software without specific prior | ||
written permission. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A | ||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | ||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
This repository is a mixed bag of Bro scripts that are too specific to be | ||
included in the official [Bro scripts | ||
repository](http://git.bro-ids.org/bro-scripts.git). The scripts are | ||
of expirimental nature and might have a few edges, so you are welcome to ping | ||
me for feedback and clarifications. | ||
|
||
Please see the file `COPYING` for the licence details. | ||
|
||
Documentation | ||
============= | ||
|
||
facebook.bro | ||
------------ | ||
|
||
This script analyses Facebook webchat sessions and extracts messages between | ||
two conversing buddies. [My blog][fb-chat-post] contains a bit more details | ||
about this script. | ||
|
||
[fb-chat-post]: http://matthias.vallentin.net/blog/2011/06/analyzing-facebook-webchat-sessions-with-bro/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
# | ||
# A Facebook analysis script. | ||
# | ||
# The script parses the HTTP body of Facebook JSON messages and reconstructs a | ||
# stream of chat messages from it. | ||
# | ||
|
||
@load http-request | ||
@load http-reply | ||
|
||
module HTTP; | ||
|
||
export | ||
{ | ||
redef enum Notice += | ||
{ | ||
Facebook_Chat_Start, | ||
Facebook_Chat_Message, | ||
Facebook_Chat_End | ||
}; | ||
|
||
# A chat message | ||
type chat_message: record | ||
{ | ||
timestamp: string; # Message timestamp. | ||
from: string; # Name of the sender | ||
to: string; # Name of the recipient. | ||
text: string; # The actual message. | ||
}; | ||
|
||
type chat_session: record | ||
{ | ||
start: time; # Unix timestamp of first message. | ||
end: time; # Unix timestamp of last message. | ||
n: count; # Total number of messages in session. | ||
}; | ||
} | ||
|
||
type http_body: record | ||
{ | ||
content_length: count; # Value from the CONTENT-LENGTH header. | ||
size: count; # Current size of accumulated body. | ||
data: string; # Body data. | ||
}; | ||
|
||
const facebook_log = open_log_file("facebook") &redef; | ||
|
||
# If a HTTP body spans multiple events, this buffer accumulates the chunks. | ||
global bodies: table[conn_id] of http_body; | ||
|
||
# Chats index by HTTP session ID. | ||
global chats: table[conn_id] of chat_session; | ||
|
||
function new_chat_session() : chat_session | ||
{ | ||
local s: chat_session; | ||
|
||
s$start = network_time(); | ||
s$end = s$start; | ||
s$n = 0; | ||
|
||
return s; | ||
} | ||
|
||
function new_http_body() : http_body | ||
{ | ||
local body: http_body; | ||
|
||
body$size = 0; | ||
body$data = ""; | ||
|
||
return body; | ||
} | ||
|
||
# Extract text between two quotes. | ||
function extract_quoted(str: string) : string | ||
{ | ||
local q = find_last(str, /\"([^\"]|\\\")*\"$/); | ||
return split(q, /\"/)[2]; | ||
} | ||
|
||
function parse_fb_message(data: string) : chat_message | ||
{ | ||
local msg: chat_message; | ||
|
||
local array = split(data, /,\"/); # " | ||
for (i in array) | ||
{ | ||
local val = array[i]; | ||
if (strstr(val, "time\":") > 0) | ||
msg$timestamp = find_last(val, /[0-9]{13}/); | ||
else if (strstr(val, "from_name\":\"") > 0) | ||
msg$from = extract_quoted(val); | ||
else if (strstr(val, "to_name\":\"") > 0) | ||
msg$to = extract_quoted(val); | ||
else if (strstr(val, "\"msg\":{\"text\":\"") > 0) | ||
msg$text = extract_quoted(val); | ||
} | ||
|
||
return msg; | ||
} | ||
|
||
function report_message(c: connection, msg: chat_message) | ||
{ | ||
local format = "%s (%s -> %s) %s"; | ||
local message = fmt(format, msg$timestamp, msg$from, msg$to, msg$text); | ||
NOTICE([$note=Facebook_Chat_Message, $conn=c, $msg = message]); | ||
print facebook_log, message; | ||
} | ||
|
||
# For requests, look at the HOST header to determine whether we're expecting a | ||
# potential chat message. For replies, record the size of the HTTP entity to | ||
# make sure we reassemble it completely. | ||
event http_header(c: connection, is_orig: bool, name: string, value: string) | ||
{ | ||
local id = c$id; | ||
if (is_orig && name == "HOST" && /[0-9]+\.channel\.facebook\.com/ in value) | ||
{ | ||
if (id !in chats) | ||
chats[id] = new_chat_session(); | ||
} | ||
else if (! is_orig && name == "CONTENT-LENGTH") | ||
{ | ||
if (id !in chats) | ||
return; | ||
|
||
# If we have the current ID is still in the message buffer when seeing | ||
# a new reply, it means the the previous message has not been received | ||
# in its entirety. That is, there is some partial HTTP body hanging in | ||
# the buffer that we could try to parse at some point. | ||
if (id in bodies) | ||
print fmt("warning: ignoring incomplete HTTP body in %s", id); | ||
|
||
bodies[id] = new_http_body(); | ||
bodies[id]$content_length = to_count(value); | ||
} | ||
} | ||
|
||
# Reassemble the HTTP body of replies and look for Facebook chat messages. | ||
event http_entity_data(c: connection, is_orig: bool, length: count, data: string) | ||
{ | ||
local id = c$id; | ||
if (id !in bodies) | ||
return; | ||
|
||
local body = bodies[id]; | ||
|
||
body$data = cat(body$data, data); | ||
|
||
if (body$size + length < body$content_length) | ||
{ | ||
# Accumulate partial HTTP body data and return. | ||
body$size += length; | ||
return; | ||
} | ||
|
||
local chat = chats[id]; | ||
chat$end = network_time(); | ||
++chat$n; | ||
|
||
# Hackish heuristic that indicates we're dealing with a chat message. | ||
if (/^for \(;;\);\{\"t\":\"msg\".*text\":\"/ in body$data) | ||
{ | ||
local msg = parse_fb_message(body$data); | ||
report_message(c, msg); | ||
} | ||
|
||
delete bodies[id]; | ||
} | ||
|
||
# Evict chat session state. | ||
# TODO: it would be nice to use the actual closing message from Facebook | ||
# itself, which looks similar to: | ||
# for (;;);{"t":"msg","c":"p_1111111111","s":18,"ms":[{"id":111111111111111, | ||
# "window_id":1111111111,"type":"close_chat"}]}" | ||
event connection_state_remove(c: connection) | ||
{ | ||
local id = c$id; | ||
if (id !in chats) | ||
return; | ||
|
||
local session = chats[id]; | ||
# print fmt("chat session ended (%s, %d messages)", | ||
# session$end - session$start, session$n); | ||
|
||
delete chats[id]; | ||
} |