Permalink
Browse files

Merge branch 'release/1.1.1'

* release/1.1.1:
  update README with release notes
  rename fix_partial_content to recalculate_partial_content
  use the default config.translate_options
  add translate_options
  add options to Ralf::ClfTranslator.new
  If request is '206 Partial Content' estimate the actual bytes when apparent bandwidth has exceeded 2Mbit/sec.
  Rename to .txt to show in TextMate (which ignores .log in my config).
  to save some space in the output limit the requester string to 10 chars instead of 64
  Extract the translator
  other changes to the .gemspec
  update versions of required gems and add autotest
  move version into a class related file
  use bundler
  • Loading branch information...
LeipeLeon committed Feb 11, 2013
2 parents 2fd5091 + 31d0029 commit 79988ce6a0ec152d47ef7314d07279cecd1a8722
View
2 .rspec
@@ -0,0 +1,2 @@
+--color
+--format progress
View
@@ -0,0 +1,3 @@
+source :rubygems
+gemspec
+gem 'rspec'
View
@@ -0,0 +1,38 @@
+PATH
+ remote: .
+ specs:
+ ralf (1.1.1)
+ chronic (~> 0.9.0)
+ logmerge (~> 1.0.3)
+ right_aws (~> 3.0.4)
+
+GEM
+ remote: http://rubygems.org/
+ specs:
+ ZenTest (4.8.4)
+ autotest (4.4.6)
+ ZenTest (>= 4.4.1)
+ chronic (0.9.0)
+ diff-lcs (1.1.3)
+ fakeweb (1.3.0)
+ logmerge (1.0.3)
+ right_aws (3.0.4)
+ right_http_connection (>= 1.2.5)
+ right_http_connection (1.3.0)
+ rspec (2.12.0)
+ rspec-core (~> 2.12.0)
+ rspec-expectations (~> 2.12.0)
+ rspec-mocks (~> 2.12.0)
+ rspec-core (2.12.2)
+ rspec-expectations (2.12.1)
+ diff-lcs (~> 1.1.3)
+ rspec-mocks (2.12.2)
+
+PLATFORMS
+ ruby
+
+DEPENDENCIES
+ autotest (~> 4.4.6)
+ fakeweb (~> 1.3.0)
+ ralf!
+ rspec
View
@@ -1,3 +1,15 @@
+Release 1.1.1 [2013-02-11 13:14]
+
+* Update gemspec
+* extract the translator in it's own class
+* add option to recalculate the '206 Partial Content' issue on S3
+ (see https://forums.aws.amazon.com/thread.jspa?threadID=54214 for more details)
+
+Release 1.1.0 [2011-05-08]
+
+* Switched to Fileutils for 1.9 compatibility
+
+
= Synopsis
Download, merge and convert Amazon S3 bucket log files for a specified date or date range.
View
@@ -1 +0,0 @@
-1.1.0
View
@@ -2,8 +2,11 @@
require 'right_aws'
require 'logmerge'
require 'fileutils'
+
+require 'ralf/version'
require 'ralf/config'
require 'ralf/bucket'
+require 'ralf/clf_translator'
require 'chronic'
require 'stringio'
require 'date'
@@ -18,9 +21,6 @@ class Ralf
ROOT_DEFAULT_CONFIG_FILE = '/etc/ralf.conf'
USER_DEFAULT_CONFIG_FILE = '~/.ralf.conf'
- AMAZON_LOG_FORMAT = Regexp.new('([^ ]*) ([^ ]*) \[([^\]]*)\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) "([^"]*)" ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) "([^"]*)" "([^"]*)"')
- AMAZON_LOG_FORMAT_COPY = Regexp.new('([^ ]*) ([^ ]*) \[([^\]]*)\] ([^ ]*) ([^ ]*) ([^ ]*) (REST.COPY.OBJECT_GET) ([^ ]*) (-) ([^ ]*) (-) (-) ([^ ]*) (-) (-) (-) (-) (-)')
-
# The current configuration.
attr_reader :config
@@ -90,7 +90,7 @@ def run(output_file = nil)
Ralf.merge(merged_log, log_files)
# convert to common log format
- Ralf.convert_to_common_log_format(merged_log, output_log)
+ Ralf.convert_to_common_log_format(merged_log, output_log, config.translate_options)
puts "#{log_files.size} files" if config.debug?
end
@@ -133,30 +133,18 @@ def self.merge(output_file, log_files)
end
# Convert the input_log file to Apache Common Log Format into output_log
- def self.convert_to_common_log_format(input_log, output_log)
+ def self.convert_to_common_log_format(input_log, output_log, options)
out_file = File.open(output_log, 'w')
File.open(input_log, 'r') do |in_file|
while (line = in_file.gets)
- if clf = translate_to_clf(line)
+ if clf = Ralf::ClfTranslator.new(line, options).to_s
out_file.puts(clf)
end
end
end
out_file.close
end
- def self.translate_to_clf(line)
- if line =~ AMAZON_LOG_FORMAT
- # host, date, ip, acl, request, status, bytes, agent, total_time_ms = $2, $3, $4, $5, $9, $10, $12, $17, $14
- "%s - %s [%s] \"%s\" %s %s \"%s\" \"%s\" %d" % [$4, $5, $3, $9, $10, $12, $16, $17, ($14.to_i/1000.0).round]
- elsif line =~ AMAZON_LOG_FORMAT_COPY
- "%s - %s [%s] \"%s\" %s %s \"%s\" \"REST.COPY.OBJECT_GET\" %d" % [$4, $5, $3, "POST /#{$8} HTTP/1.1", $10, $12, $16, 0]
- else
- $stderr.puts "# ERROR: #{line}"
- nil
- end
- end
-
def load_config(cli_config_file)
result = nil
if cli_config_file
View
@@ -0,0 +1,97 @@
+class Ralf::ClfTranslator
+
+ AMAZON_LOG_FORMAT = Regexp.new('([^ ]*) ([^ ]*) \[([^\]]*)\] ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) "([^"]*)" ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*) "([^"]*)" "([^"]*)"')
+ AMAZON_LOG_COPY_FORMAT = Regexp.new('([^ ]*) ([^ ]*) \[([^\]]*)\] ([^ ]*) ([^ ]*) ([^ ]*) (REST.COPY.OBJECT_GET) ([^ ]*) (-) ([^ ]*) (-) (-) ([^ ]*) (-) (-) (-) (-) (-)')
+
+ attr :line
+ attr_reader :owner, :bucket, :timestamp, :remote_ip, :request_id, :operation, :key, :request_uri, :http_status, :s3_error_code, :bytes_sent, :object_size, :total_time_in_ms, :turn_around_time_in_ms, :referrer, :user_agent, :request_version_id, :duration
+ attr_reader :options
+
+ # options:
+ # :recalculate_partial_content => false (default)
+ # If request is '206 Partial Content' estimate the actual bytes when apparent bandwidth has exceeded 2Mbit/sec.
+ # S3 caches content to edge servers with a burst which never reaches the client
+
+ def initialize(line, options = {})
+ @options = options
+ @error = false
+ @line = line
+ @translate_successfull = translate
+ end
+
+ def to_s
+ if @translate_successfull
+ "%s - %s [%s] \"%s\" %s %s \"%s\" \"%s\" %d" % [remote_ip, requester, timestamp, request_uri, http_status, bytes_sent, referrer, user_agent, duration]
+ else
+ nil
+ end
+ end
+
+private
+
+ def requester
+ @requester[0..9]
+ end
+
+ def translate
+ if line =~ AMAZON_LOG_FORMAT
+ @owner, @bucket, @timestamp, @remote_ip, @requester, @request_id, @operation, @key, @request_uri, @http_status, @s3_error_code, @bytes_sent, @object_size, @total_time_in_ms, @turn_around_time_in_ms, @referrer, @user_agent, @request_version_id = $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18
+
+ if options[:recalculate_partial_content] && 206 == http_status.to_i && ((bytes_sent.to_i*8)/total_time_in_ms.to_i > 2000)
+ @bytes_sent = [ 128 * 1024 + 3 * total_time_in_ms.to_i, bytes_sent.to_i ].min # 128 K buffer + 3 bytes/msec = 3 kbytes/sec = 24 kbit/sec
+ end
+ @duration = (total_time_in_ms.to_i/1000.0).round
+
+ elsif line =~ AMAZON_LOG_COPY_FORMAT
+ @owner, @bucket, @timestamp, @remote_ip, @requester, @request_id, @operation, @key, @request_uri, @http_status, @s3_error_code, @bytes_sent, @object_size, @total_time_in_ms, @turn_around_time_in_ms, @referrer, @user_agent, @request_version_id = $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18
+ @operation == 'REST.COPY.OBJECT_GET'
+ @user_agent = @operation
+ @duration = 0
+ @request_uri = "POST /#{@key} HTTP/1.1"
+
+ else
+ $stderr.puts "# ERROR: #{line}"
+ false
+ end
+ end
+
+end
+
+# convert the format as specified in http://docs.aws.amazon.com/AmazonS3/latest/dev/LogFormat.html
+#
+# 1 Bucket Owner
+# The canonical user id of the owner of the source bucket.
+# 2 Bucket
+# The name of the bucket that the request was processed against. If the system receives a malformed request and cannot determine the bucket, the request will not appear in any server access log.
+# 3 Time
+# The time at which the request was received. The format, using strftime() terminology, is [%d/%b/%Y:%H:%M:%S %z]
+# 4 Remote IP
+# The apparent Internet address of the requester. Intermediate proxies and firewalls might obscure the actual address of the machine making the request.
+# 5 Requester
+# The canonical user id of the requester, or the string "Anonymous" for unauthenticated requests. This identifier is the same one used for access control purposes.
+# 6 Request ID
+# The request ID is a string generated by Amazon S3 to uniquely identify each request.
+# 7 Operation
+# Either SOAP.operation, REST.HTTP_method.resource_type or WEBSITE.HTTP_method.resource_type
+# 8 Key
+# The "key" part of the request, URL encoded, or "-" if the operation does not take a key parameter.
+# 9 Request-URI
+# The Request-URI part of the HTTP request message.
+# 10 HTTP status
+# The numeric HTTP status code of the response.
+# 11 Error Code
+# The Amazon S3 Error Code, or "-" if no error occurred.
+# 12 Bytes Sent
+# The number of response bytes sent, excluding HTTP protocol overhead, or "-" if zero.
+# 13 Object Size
+# The total size of the object in question.
+# 14 Total Time
+# The number of milliseconds the request was in flight from the server's perspective. This value is measured from the time your request is received to the time that the last byte of the response is sent. Measurements made from the client's perspective might be longer due to network latency.
+# 15 Turn-Around Time
+# The number of milliseconds that Amazon S3 spent processing your request. This value is measured from the time the last byte of your request was received until the time the first byte of the response was sent.
+# 16 Referrer
+# The value of the HTTP Referrer header, if present. HTTP user-agents (e.g. browsers) typically set this header to the URL of the linking or embedding page when making a request.
+# 17 User-Agent
+# The value of the HTTP User-Agent header.
+# 18 Version Id
+# The version ID in the request, or "-" if the operation does not take a versionId parameter.
View
@@ -22,6 +22,7 @@ class RangeError < StandardError ; end
:cache_dir # reader interpolates format
attr_reader :errors
+ attr_reader :translate_options
protected
@@ -40,6 +41,7 @@ def initialize(options = {})
@options[:now] ||= nil
@options[:range] ||= 'today'
@options[:cache_dir] ||= (0 == Process.uid ? ROOT_DEFAULT_CACHE_DIR : File.expand_path(USER_DEFAULT_CACHE_DIR))
+ @options[:translate_options] ||= {}
assign_options(@options)
end
@@ -102,6 +104,10 @@ def range=(args)
@range = range
end
+ def translate_options=(opts)
+ @translate_options = {:recalculate_partial_content => false}.merge(opts)
+ end
+
def output_file(variables)
Ralf::Interpolation.interpolate(@output_file, variables)
end
View
@@ -0,0 +1,3 @@
+class Ralf #:nodoc:
+ VERSION = "1.1.1"
+end
View
@@ -1,77 +1,38 @@
-# Generated by jeweler
-# DO NOT EDIT THIS FILE DIRECTLY
-# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
# -*- encoding: utf-8 -*-
+$:.push File.expand_path("../lib", __FILE__)
+require "ralf/version"
Gem::Specification.new do |s|
s.name = %q{ralf}
- s.version = "1.1.0"
+ s.version = Ralf::VERSION
- s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
- s.authors = ["Klaas Jan Wierenga", "Leon Berenschot"]
- s.date = %q{2011-05-09}
+ s.platform = Gem::Platform::RUBY
+ s.authors = ["Klaas Jan Wierenga", "Leon Berenschot"]
+ s.email = ["k.j.wierenga@gmail.com", "leonb@beriedata.nl"]
+ s.homepage = %q{http://github.com/kjwierenga/ralf}
+ s.summary = %q{Retrieve Amazon Log Files}
+ s.description = %q{ Download logfiles from Amazon S3 buckets to local disk and combine them in one Apache CLF per bucket }
+
+ s.files = `git ls-files`.split("\n")
+ s.test_files = `git ls-files -- {spec,features}/*`.split("\n")
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+ s.require_paths = ["lib"]
+
+ s.date = %q{2013-02-06}
s.default_executable = %q{ralf}
- s.description = %q{ Download logfiles from Amazon S3 buckets to local disk and combine them in one Apache CLF per bucket
-}
- s.email = ["k.j.wierenga@gmail.com", "leonb@beriedata.nl"]
- s.executables = ["ralf"]
s.extra_rdoc_files = [
"README.rdoc"
]
- s.files = [
- ".rvmrc",
- "README.rdoc",
- "Rakefile",
- "VERSION",
- "bin/ralf",
- "lib/ralf.rb",
- "lib/ralf/bucket.rb",
- "lib/ralf/config.rb",
- "lib/ralf/interpolation.rb",
- "lib/ralf/log.rb",
- "lib/ralf/option_parser.rb",
- "ralf.gemspec",
- "spec/fixtures/apache.log",
- "spec/fixtures/example_buckets.yaml",
- "spec/ralf/bucket_spec.rb",
- "spec/ralf/config_spec.rb",
- "spec/ralf/interpolation_spec.rb",
- "spec/ralf/log_spec.rb",
- "spec/ralf/option_parser_spec.rb",
- "spec/ralf_spec.rb",
- "spec/spec.opts",
- "spec/spec_helper.rb",
- "spec/support/fakeweb.rb"
- ]
- s.homepage = %q{http://github.com/kjwierenga/ralf}
- s.rdoc_options = ["--exclude", "."]
- s.require_paths = ["lib"]
- s.rubygems_version = %q{1.3.7}
- s.summary = %q{Retrieve Amazon Log Files}
- if s.respond_to? :specification_version then
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
- s.specification_version = 3
+ s.rdoc_options = ["--exclude", "."]
+
+ s.add_development_dependency "rspec", "~> 2"
+ s.add_development_dependency "autotest", '~> 4.4.6'
+ s.add_development_dependency "fakeweb", "~> 1.3.0"
+
+ s.add_runtime_dependency "right_aws", "~> 3.0.4"
+ s.add_runtime_dependency "logmerge", "~> 1.0.3"
+ s.add_runtime_dependency "chronic", "~> 0.9.0"
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
- s.add_development_dependency(%q<rspec>, ["~> 1.3.0"])
- s.add_development_dependency(%q<fakeweb>, ["~> 1.2.8"])
- s.add_runtime_dependency(%q<right_aws>, ["~> 1.10.0"])
- s.add_runtime_dependency(%q<logmerge>, ["~> 1.0.2"])
- s.add_runtime_dependency(%q<chronic>, [">= 0.2.3"])
- else
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
- s.add_dependency(%q<fakeweb>, ["~> 1.2.8"])
- s.add_dependency(%q<right_aws>, ["~> 1.10.0"])
- s.add_dependency(%q<logmerge>, ["~> 1.0.2"])
- s.add_dependency(%q<chronic>, [">= 0.2.3"])
- end
- else
- s.add_dependency(%q<rspec>, ["~> 1.3.0"])
- s.add_dependency(%q<fakeweb>, ["~> 1.2.8"])
- s.add_dependency(%q<right_aws>, ["~> 1.10.0"])
- s.add_dependency(%q<logmerge>, ["~> 1.0.2"])
- s.add_dependency(%q<chronic>, [">= 0.2.3"])
- end
end
File renamed without changes.
Oops, something went wrong.

0 comments on commit 79988ce

Please sign in to comment.