Skip to content
This repository has been archived by the owner on May 25, 2021. It is now read-only.

Commit

Permalink
re-add traffic correction, prepare LogMessage class to be configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
niwo committed Apr 25, 2014
1 parent 7901eb1 commit 5b1d146
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 45 deletions.
21 changes: 20 additions & 1 deletion lib/sms-logparser/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,18 @@ def version
desc: "Accumulate and cache results and send totals"
option :concurrency, type: :numeric, default: 4, aliases: %w(-C),
desc: "How many threads to use in parallel when sending cached results"
option :webcast_traffic_correction, type: :numeric, aliases: %w(-W),
desc: "Correction factor for webcast traffic"
option :mobile_traffic_correction, type: :numeric, aliases: %w(-M),
desc: "Correction factor for mobile traffic"
option :podcast_traffic_correction, type: :numeric, aliases: %w(-P),
desc: "Correction factor for podcast traffic"
def parse
start_message = "Parser started"
start_message += options[:simulate] ? " in simulation mode." : "."
logger.debug("Parser options: #{options.inspect}")
logger.info(start_message)
parser = Parser.new(options)
cache = DataCache.new if options[:accumulate]
mysql = Mysql.new(options)
if !options[:simulate] && mysql.parser_running?
Expand All @@ -60,7 +67,7 @@ def parse
mysql.get_entries(last_id: state[:last_event_id], limit: options[:limit]) do |entries|
logger.info { "Getting log messages from database..." }
entries.each do |entry|
Parser.extract_data_from_msg(entry['Message']) do |data|
parser.extract_data_from_msg(entry['Message']) do |data|
if data.size > 0
data.each do |data_entry|
if options[:accumulate]
Expand Down Expand Up @@ -189,6 +196,18 @@ def logger
SmsLogparser::Loggster.instance.set_log_device options[:logfile]
end

def set_parser_options
if options[:webcast_traffic_correction]
SmsLogparser::Parser.webcast_traffic_correction = options[:webcast_traffic_correction]
end
if options[:podcast_traffic_correction]
SmsLogparser::Parser.podcast_traffic_correction = options[:podcast_traffic_correction]
end
if options[:mobile_traffic_correction]
SmsLogparser::Parser.mobile_traffic_correction = options[:mobile_traffic_correction]
end
end

def verbose_parser_output(entry_id, data, url, status)
logger.debug {
"Parsing data for #{entry_id} (#{data.map{|k,v| "#{k}=\"#{v || '-'}\""}.join(" ") || ''})"
Expand Down
29 changes: 19 additions & 10 deletions lib/sms-logparser/log_message.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,44 @@ module SmsLogparser
class LogMessage

attr_reader :message

def initialize(message)
# reove double slashes from message

MOBILE_AGENT = '.*(Mobi|IEMobile|Mobile Safari|iPhone|iPod|iPad|Android|BlackBerry|Opera Mini).*'
PODCAST_AGENT = '.*(iTunes).*'
FILE_EXCLUDE = 'detect.mp4'
STATUS_MATCH = '200|206'

def initialize(message, options = {})
# remove double slashes from message
@message = message.squeeze('/')
@mobile_agent = options[:mobile_agent_regex] || MOBILE_AGENT
@podcast_agent = options[:podcast_agent_regex] || PODCAST_AGENT
@file_exclude = options[:file_exclude_regex] || FILE_EXCLUDE
@status_match = options[:status_match_regex] || STATUS_MATCH
end

def self.match?(message)
if match = message.match(/\/content\/\d+\/\d+\/\d+\/(\S*).+(200|206)/)
def match?
if match = @message.match(/\/content\/\d+\/\d+\/\d+\/(\S*).+(#{@status_match})/)
# ignore detect.mp4
return true unless match[1] =~ /detect.mp4/i
return true unless match[1] =~ /#{@file_exclude}/i
end
false
end

# see https://developer.mozilla.org/en-US/docs/Browser_detection_using_the_user_agent
# for mobile browser detection
def self.get_type(user_agent)
def self.get_type(user_agent, mobile_agent = MOBILE_AGENT, podcast_agent = PODCAST_AGENT)
case user_agent
when /.*(iTunes).*/i
when /#{podcast_agent}/i
'PODCAST'
when /.*(Mobi|IEMobile|Mobile Safari|iPhone|iPod|iPad|Android|BlackBerry|Opera Mini).*/
when /#{mobile_agent}/i
'MOBILE'
else
'WEBCAST'
end
end

def type
LogMessage.get_type(user_agent)
LogMessage.get_type(user_agent, @mobile_agent, @podcast_agent)
end

def match
Expand Down
31 changes: 24 additions & 7 deletions lib/sms-logparser/parser.rb
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
module SmsLogparser
module Parser
class Parser

module_function
def initialize(options = {})
@options = options
end

def logger
SmsLogparser::Loggster.instance
end

def extract_data_from_msg(message)
data = []
if LogMessage.match?(message)
log_message = LogMessage.new(message, @options)
if log_message.match?
logger.debug { "Parser MATCH: #{message}" }
log_message = LogMessage.new(message)
if log_message.match
data << Parser.extract_usage_data(log_message)
data << Parser.extract_visit(log_message)
data << extract_usage_data(log_message)
data << extract_visit(log_message)
data.compact! # remove nil values
else
logger.warn { "Can't extract data from message: #{message}" }
Expand All @@ -27,9 +29,10 @@ def extract_data_from_msg(message)
end

def extract_usage_data(log_message)
traffic = log_message.bytes * traffic_correction_factor(log_message.type)
log_message.account_info.merge(
type: "TRAFFIC_#{log_message.type}",
value: log_message.bytes
value: traffic.round(0)
)
end

Expand All @@ -51,5 +54,19 @@ def extract_visit(log_message)
visit_data || nil
end

def traffic_correction_factor(traffic_type)
factor = case traffic_type
when 'WEBCAST'
@options[:webcast_traffic_correction] || 1.0
when 'MOBILE'
@options[:mobile_traffic_correction] || 1.0
when 'PODCAST'
@options[:podcast_traffic_correction] || 1.0
else
1.0
end
factor.to_f
end

end # class
end # module
2 changes: 1 addition & 1 deletion lib/sms-logparser/version.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module SmsLogparser
VERSION = "0.16.3"
VERSION = "0.17.0"
end
24 changes: 12 additions & 12 deletions spec/log_message_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,40 @@

%w(f4v flv mp4 mp3 ts m3u8 jpg js css m4a png sid).each do |extension|
it "matches #{extension} files" do
SmsLogparser::LogMessage.match?(
SmsLogparser::LogMessage.new(
"GET /content/2/719/54986/file.#{extension} HTTP/1.1\" 200 6741309 "
).must_equal true
).match?.must_equal true
end
end

%w(200 206).each do |status|
it "does match status code #{status}" do
SmsLogparser::LogMessage.match?(
SmsLogparser::LogMessage.new(
"GET /content/2/719/54986/file.mp4 HTTP/1.1\" #{status} 50000 "
).must_equal true
).match?.must_equal true
end
end

%w(404 500 304).each do |status|
it "does not match status code #{status}" do
SmsLogparser::LogMessage.match?(
SmsLogparser::LogMessage.new(
"GET /content/2/719/54986/file.mp4 HTTP/1.1\" #{status} 50000 "
).must_equal false
).match?.must_equal false
end
end

%w(contents public index assets).each do |dir|
it "does not match directories other than /content" do
SmsLogparser::LogMessage.match?(
SmsLogparser::LogMessage.new(
"GET /#{dir}/2/719/54986/file.mp4 HTTP/1.1\" 200 50000 "
).must_equal false
).match?.must_equal false
end
end

it "does not match for 'detect.mp4' files" do
SmsLogparser::LogMessage.match?(
SmsLogparser::LogMessage.new(
"GET /content/2/719/54986/detect.mp4 HTTP/1.1\" 200 128 "
).must_equal false
).match?.must_equal false
end

[
Expand All @@ -48,15 +48,15 @@
"Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0)",
"Mozilla/5.0 (iPhone; U; CPU iPhone OS 5_1_1 like Mac OS X; en) AppleWebKit/534.46.0 (KHTML, like Gecko) CriOS/19.0.1084.60 Mobile/9B206 Safari/7534.48.3"
].each do |mobile_agent|
it "traffic type for mobile user agents is TRAFFIC_MOBILE (#{mobile_agent})" do
it "type for mobile user agents is MOBILE (#{mobile_agent})" do
SmsLogparser::LogMessage.get_type(mobile_agent).must_equal "MOBILE"
end
end

[
'127.0.0.1 - - [13/Apr/2014:05:33:23 +0200] "GET /content/51/52/42481/simvid_1.mp4 HTTP/1.1" 206 7865189 "-" "iTunes/11.1.5 (Windows; Microsoft Windows 7 Home Premium Edition Service Pack 1 (Build 7601)) AppleWebKit/537.60.11"'
].each do |podcast_agent|
it "traffic type for mobile user agents is TRAFFIC_PODCAST (#{podcast_agent})" do
it "type for podcast agents user agents is PODCAST (#{podcast_agent})" do
SmsLogparser::LogMessage.get_type(podcast_agent).must_equal "PODCAST"
end
end
Expand Down
32 changes: 18 additions & 14 deletions spec/parser_spec.rb
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
require 'spec_helper'

describe SmsLogparser::Parser do
describe @parser do

before do
@parser = SmsLogparser::Parser.new
end

it "count index.m3u8 with status 200 and user agent iPhone as mobile visit" do
message = '- - [22/Apr/2014:17:44:17 +0200] "GET /content/51/52/42701/index.m3u8 HTTP/1.1" 200 319009 "-" "AppleCoreMedia/1.0.0.11D167 (iPhone; U; CPU OS 7_1 like Mac OS X; de_de)"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data[1][:customer_id].must_equal "51"
data[1][:author_id].must_equal "52"
data[1][:project_id].must_equal "42701"
Expand All @@ -14,7 +18,7 @@

it "count *.flv with status 200 and user agent Android as mobile visit" do
message = ' - - [22/Apr/2014:17:44:27 +0200] "GET /content/51/52/42709/simvid_1_40.flv HTTP/1.1" 200 9625900 "http://blick.simplex.tv/NubesPlayer/index.html?cID=51&aID=52&pID=42709&autostart=false&themeColor=d6081c&embed=1&configUrl=http://f.blick.ch/resources/61786/ver1-0/js/xtendxIframeStatsSmartphone.js?adtechID=3522740&language=de&quality=40&hideHD=true&progressiveDownload=true" "Mozilla/5.0 (Linux; Android 4.4.2; C6903 Build/14.3.A.0.757) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data[1][:customer_id].must_equal "51"
data[1][:author_id].must_equal "52"
data[1][:project_id].must_equal "42709"
Expand All @@ -24,7 +28,7 @@

it "count *.mp4 with status 200 and user agent Android as mobile visit" do
message = '- - [22/Apr/2014:17:44:21 +0200] "GET /content/51/52/42701/simvid_1.mp4 HTTP/1.1" 200 2644715 "-" "Samsung GT-I9505 stagefright/1.2 (Linux;Android 4.4.2)"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data[1][:customer_id].must_equal "51"
data[1][:author_id].must_equal "52"
data[1][:project_id].must_equal "42701"
Expand All @@ -34,7 +38,7 @@

it "count *.flv with status 200 and user agent Firefox on Windows as webcast visit" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv HTTP/1.1" 200 6742274 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data[1][:customer_id].must_equal "51"
data[1][:author_id].must_equal "52"
data[1][:project_id].must_equal "42431"
Expand All @@ -44,7 +48,7 @@

it "count traffic with status 206 and a argumenst string and user agent Firefox on Windows as webcast visit" do
message = '- - [23/Apr/2014:17:36:33 +0200] "GET /content/51/52/42721/simvid_1_40.flv?position=22 HTTP/1.1" 206 100708 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; rv:28.0) Gecko/20100101 Firefox/28.0"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.first[:customer_id].must_equal "51"
data.first[:author_id].must_equal "52"
data.first[:project_id].must_equal "42721"
Expand All @@ -54,7 +58,7 @@

it "count traffic with status 200 and no file from bot as webcast visit" do
message = '- - [23/Apr/2014:17:47:32 +0200] "GET /content/51/52/42624/ HTTP/1.1" 200 1181 "-" "Googlebot-Video/1.0"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 1
data.first[:customer_id].must_equal "51"
data.first[:author_id].must_equal "52"
Expand All @@ -65,43 +69,43 @@

it "do not count *.css with status 200 as visit" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/application.css HTTP/1.1" 200 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 1
end

it "do not count status 206 as visit" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv HTTP/1.1" 206 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 1
end

it "count visit with no args" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv HTTP/1.1" 200 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 2
end

it "count visit with position=1" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv?position=0 HTTP/1.1" 200 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 2
end

it "count visit with position=1" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv?position=1 HTTP/1.1" 200 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 2
end

it "do not count visit with args position=2 or greater" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv?position=2 HTTP/1.1" 200 19299999 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 1
end

it "do not count visit when bytes < 256 * 1024" do
message = '- - [22/Apr/2014:18:00:50 +0200] "GET /content/51/52/42431/simvid_1_40.flv HTTP/1.1" 200 200000 "http://blick.simplex.tv/NubesPlayer/player.swf" "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko"'
data = SmsLogparser::Parser.extract_data_from_msg(message)
data = @parser.extract_data_from_msg(message)
data.size.must_equal 1
end

Expand Down

0 comments on commit 5b1d146

Please sign in to comment.