Skip to content
Browse files

Update Facebook webchat analyzer for Bro 2.x.

This revamped version fo facebook.bro uses the new bodies.bro script to keep
track of HTTP bodies. The previous version had a tightly couples implementation
of the two rather disjoint concepts. Now, facebook.bro is much cleaner and
focuses only on the tasks it needs to get done, which is parsing HTTP bodies.

With the new Bro 2.x logging framework, the output of this script is now a
separate stream that logs into facebook.log. Here is some example output:

    #separator \x09
    #path   facebook
    #fields timestamp       chat_from       chat_to chat_msg
    #types  string  string  string  string
    1303218454567   Mondo Cheeze    Udder Kaos      So I need the URL, dude.  What is it?
    1303218465938   Udder Kaos      Mondo Cheeze    the URL?
    1303218474259   Mondo Cheeze    Udder Kaos      Yeah for the secret image
    1303218481721   Udder Kaos      Mondo Cheeze    ok lemme see
    ...
  • Loading branch information...
1 parent 7ef3d73 commit 6e1bc78eae3354e726dd39599e65dd6d44d31d0f @mavam committed
Showing with 73 additions and 159 deletions.
  1. +73 −159 facebook.bro
View
232 facebook.bro
@@ -1,188 +1,102 @@
-#
-# A Facebook analysis script.
-#
-# The script parses the HTTP body of Facebook JSON messages and reconstructs a
-# stream of chat messages from it.
-#
-
-@load http-request
-@load http-reply
-
-module HTTP;
+##!
+##! A Facebook analysis script.
+##!
+##! The script parses the HTTP body of Facebook JSON messages and reconstructs
+##! a stream of chat messages from it.
+
+# TODO:
+# - Add more message types.
+# - Parse other non-chat messages as well and establish the notion of a
+# session. To this end, we can use the actual closing message from Facebook
+# itself, which looks like this:
+# for (;;);{"t":"msg","c":"p_1111111111","s":18,"ms":[{
+# "id":111111111111111, "window_id":1111111111,
+# "type":"close_chat"}]}"
+
+@load bodies
+
+redef HTTP::hook_reply_bodies = T;
+redef HTTP::hook_host_pattern = /[0-9]+\.channel\.facebook\.com/;
+
+module Facebook;
+
+export {
+ redef enum Log::ID += { LOG };
+
+ ## Describes the per-connection
+ type Info: record {
+ timestamp: string &log;
+ chat_from: string &log;
+ chat_to: string &log;
+ chat_msg: string &log;
+ };
-export
-{
- redef enum Notice +=
- {
- Facebook_Chat_Start,
- Facebook_Chat_Message,
- Facebook_Chat_End
+ ## The types of AJAX messages.
+ type MessageType: enum {
+ CHAT ##< A webchat message.
};
- # A chat message
+ ## A chat message
type ChatMessage: record
{
- timestamp: string; # Message timestamp.
- from: string; # Name of the sender
- to: string; # Name of the recipient.
- text: string; # The actual message.
+ msg_type: MessageType; ##< Message type.
+ timestamp: string; ##< Message timestamp.
+ from: string; ##< Name of the sender
+ to: string; ##< Name of the recipient.
+ text: string; ##< The actual message.
};
- type ChatSession: record
- {
- start: time; # Unix timestamp of first message.
- end: time; # Unix timestamp of last message.
- n: count; # Total number of messages in session.
- };
+ global log_facebook: event(rec: Info);
}
-type HTTPBody: record
-{
- content_length: count; # Value from the CONTENT-LENGTH header.
- size: count; # Current size of accumulated body.
- data: string; # Body data.
-};
+event bro_init()
+ {
+ Log::create_stream(Facebook::LOG, [$columns=Info, $ev=log_facebook]);
+ }
-const facebook_log = open_log_file("facebook") &redef;
-
-# If a HTTP body spans multiple events, this buffer accumulates the chunks.
-global bodies: table[conn_id] of HTTPBody;
-
-# Chats index by HTTP session ID.
-global chats: table[conn_id] of ChatSession;
-
-function new_chat_session() : ChatSession
-{
- local s: ChatSession;
-
- s$start = network_time();
- s$end = s$start;
- s$n = 0;
-
- return s;
-}
-
-function new_http_body() : HTTPBody
-{
- local body: HTTPBody;
-
- body$size = 0;
- body$data = "";
-
- return body;
-}
-
-# Extract text between two quotes.
+## Extract text between two quotes.
function extract_quoted(str: string) : string
-{
- local q = find_last(str, /\"([^\"]|\\\")*\"$/);
- return split(q, /\"/)[2];
-}
+ {
+ local q = find_last(str, /\"([^\"]|\\\")*\"$/); # "
+ return split(q, /\"/)[2]; # "
+ }
+## Create a webchat message from JSON data.
function parse_fb_message(data: string) : ChatMessage
-{
+ {
local msg: ChatMessage;
- local array = split(data, /,\"/); # "
- for (i in array)
- {
+ local array = split(data, /,\"/); # "
+ for ( i in array )
+ {
local val = array[i];
- if (strstr(val, "time\":") > 0)
+ if ( strstr(val, "time\":") > 0 )
msg$timestamp = find_last(val, /[0-9]{13}/);
- else if (strstr(val, "from_name\":\"") > 0)
+ else if ( strstr(val, "from_name\":\"") > 0 )
msg$from = extract_quoted(val);
- else if (strstr(val, "to_name\":\"") > 0)
+ else if ( strstr(val, "to_name\":\"") > 0 )
msg$to = extract_quoted(val);
- else if (strstr(val, "\"msg\":{\"text\":\"") > 0)
+ else if ( strstr(val, "\"msg\":{\"text\":\"") > 0 )
msg$text = extract_quoted(val);
- }
+ }
return msg;
-}
-
-function report_message(c: connection, msg: ChatMessage)
-{
- local format = "%s (%s -> %s) %s";
- local message = fmt(format, msg$timestamp, msg$from, msg$to, msg$text);
- NOTICE([$note=Facebook_Chat_Message, $conn=c, $msg = message]);
- print facebook_log, message;
-}
-
-# For requests, look at the HOST header to determine whether we're expecting a
-# potential chat message. For replies, record the size of the HTTP entity to
-# make sure we reassemble it completely.
-event http_header(c: connection, is_orig: bool, name: string, value: string)
-{
- local id = c$id;
- if (is_orig && name == "HOST" && /[0-9]+\.channel\.facebook\.com/ in value)
- {
- if (id !in chats)
- chats[id] = new_chat_session();
- }
- else if (! is_orig && name == "CONTENT-LENGTH")
- {
- if (id !in chats)
- return;
-
- # If we have the current ID is still in the message buffer when seeing
- # a new reply, it means the the previous message has not been received
- # in its entirety. That is, there is some partial HTTP body hanging in
- # the buffer that we could try to parse at some point.
- if (id in bodies)
- print fmt("warning: ignoring incomplete HTTP body in %s", id);
-
- bodies[id] = new_http_body();
- bodies[id]$content_length = to_count(value);
}
-}
-
-# Reassemble the HTTP body of replies and look for Facebook chat messages.
-event http_entity_data(c: connection, is_orig: bool, length: count,
- data: string)
-{
- local id = c$id;
- if (id !in bodies)
- return;
-
- local body = bodies[id];
-
- body$data = cat(body$data, data);
- if (body$size + length < body$content_length)
+## Reassemble the HTTP body of replies and look for Facebook chat messages.
+event http_body(c: connection, is_orig: bool, data: string, size: count)
{
- # Accumulate partial HTTP body data and return.
- body$size += length;
- return;
- }
-
- local chat = chats[id];
- chat$end = network_time();
- ++chat$n;
-
# Hackish heuristic that indicates we're dealing with a chat message.
- if (/^for \(;;\);\{\"t\":\"msg\".*text\":\"/ in body$data)
- {
- local msg = parse_fb_message(body$data);
- report_message(c, msg);
- }
-
- delete bodies[id];
-}
-
-# Evict chat session state.
-# TODO: it would be nice to use the actual closing message from Facebook
-# itself, which looks similar to:
-# for (;;);{"t":"msg","c":"p_1111111111","s":18,"ms":[{"id":111111111111111,
-# "window_id":1111111111,"type":"close_chat"}]}"
-event connection_state_remove(c: connection)
-{
- local id = c$id;
- if (id !in chats)
+ if (/^for \(;;\);\{\"t\":\"msg\".*text\":\"/ !in data) #"
return;
- local session = chats[id];
-# print fmt("chat session ended (%s, %d messages)",
-# session$end - session$start, session$n);
+ local msg = parse_fb_message(data);
- delete chats[id];
-}
+ local i: Info;
+ i$timestamp = msg$timestamp;
+ i$chat_from = msg$from;
+ i$chat_to = msg$to;
+ i$chat_msg = msg$text;
+
+ Log::write(Facebook::LOG, i);
+ }

0 comments on commit 6e1bc78

Please sign in to comment.
Something went wrong with that request. Please try again.