Skip to content

Commit

Permalink
Add simple download_summary task
Browse files Browse the repository at this point in the history
  • Loading branch information
nicksieger committed Apr 27, 2010
1 parent 249fd95 commit c6250ce
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 58 deletions.
4 changes: 3 additions & 1 deletion Gemfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- ruby -*-

source "http://gems.github.com"
source "http://gems.rubyforge.org"
source "http://rubygems.org"
gem "mojombo-jekyll"
gem "rack", "1.0.0"
gem "aws-s3"
gem "request-log-analyzer"
24 changes: 21 additions & 3 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,23 @@ dependencies:
group:
- :default
version: ">= 0"
request-log-analyzer:
group:
- :default
version: ">= 0"
aws-s3:
group:
- :default
version: ">= 0"
specs:
- classifier:
version: 1.3.1
- maruku:
version: 0.6.0
- directory_watcher:
version: 1.3.1
version: 1.3.2
- mime-types:
version: "1.16"
- syntax:
version: 1.0.0
- rack:
Expand All @@ -23,15 +33,23 @@ specs:
version: 0.5.4
- stemmer:
version: 1.0.1
- request-log-analyzer:
version: 1.6.4
- RedCloth:
version: 4.2.3
- builder:
version: 2.1.2
- aws-s3:
version: 0.6.2
- xml-simple:
version: 1.0.12
- liquid:
version: 2.0.0
- open4:
version: 1.0.1
hash: 9540012571d55fcd100a2ff75fc952f55e334d15
hash: 9137291c2c9f497418b6efcfd3e3eb26cac2b242
sources:
- Rubygems:
uri: http://gems.github.com
- Rubygems:
uri: http://gems.rubyforge.org
uri: http://rubygems.org
68 changes: 14 additions & 54 deletions Rakefile
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
begin
require 'bundler'
Bundler.setup
rescue LoadError
puts "Please install Bundler and run 'bundle install' to ensure you have all dependencies"
end

desc "Clean the site"
task :clean do
rm_rf "_site"
Expand Down Expand Up @@ -27,15 +34,6 @@ task :default do
Rake.application.display_tasks_and_comments
end

def manifest_xml(stream = File.new('s3manifest.xml'))
require 'rexml/document'
@doc ||= REXML::Document.new(stream)
end

def manifest_entries
manifest_xml.root.elements.to_a('/ListBucketResult/Contents/Key')
end

file 's3manifest.xml' do |t|
require 'open-uri'
open("http://jruby.org.s3.amazonaws.com/") do |xml|
Expand All @@ -45,59 +43,16 @@ end

desc "Create browsable index.html files for S3"
task :indexes => 's3manifest.xml' do
entries = manifest_entries.map do |el|
el.text.strip.sub(/_\$folder\$$/, '/')
end
dirs = {"." => []}
entries.sort.each do |f|
dirs[File.dirname(f)] ||= []
dirs[File.dirname(f)] << f
end
top = "www/files"
mkdir_p top, :verbose => false
dirs.each do |dir,entries|
sorted_manifest_directories.each do |dir,entries|
mkdir_p File.expand_path(File.join(top, dir)), :verbose => false
File.open(File.expand_path(File.join(top, dir, "index.html")), "wb") do |html|
html.puts <<HDR
---
layout: main
title: Files/#{dir == '.' ? '' : dir}
---
<h1>Files/#{dir == '.' ? '' : dir}</h1>
<p class="trackDownloads">
HDR
parent = File.dirname(dir)
parent = parent == '.' ? '' : "#{parent}/"
html.puts " <a href='/files/#{parent}index.html'>..</a><br/>" unless dir == '.'
entries.sort.each do |entry|
if entry =~ /\/$/
html.puts " <a href='/files/#{entry}index.html'>#{File.basename(entry)}</a><br/>"
else
html.puts " <a href='http://jruby.org.s3.amazonaws.com/#{entry}'>#{File.basename(entry)}</a><br/>"
end
end
html.puts "</p>"
write_index_html(html, dir, entries)
end
end
end

def jruby_org_bucket
require 'aws/s3'
ey_cloud = open(File.expand_path('~/.ey-cloud.yml')) { |f| YAML::load(f) }
AWS::S3::Base.establish_connection!(
:access_key_id => ey_cloud[:aws_secret_id],
:secret_access_key => ey_cloud[:aws_secret_key]
)
AWS::S3::Bucket.find('jruby.org')
end

def add_public_read_perm(obj)
return if obj.acl.grants.detect {|g| g.grantee.group == "AllUsers" && g.permission == "READ" }
puts "Updating #{obj.key} to be publicly readable"
obj.acl.grants << AWS::S3::ACL::Grant.grant(:public_read)
obj.acl(obj.acl) # save the permissions
end

task :update_hash_files do
jruby_org_bucket.objects.each do |obj|
next unless obj.key =~ /\.(md5|sha1)$/
Expand All @@ -115,3 +70,8 @@ task :update_read_perms do
add_public_read_perm(obj)
end
end

desc "Print a summary of yesterday's file downloads"
task :download_summary do
jruby_download_summary ENV['DATE']
end
111 changes: 111 additions & 0 deletions rakelib/s3.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
def manifest_xml(stream = File.new('s3manifest.xml'))
require 'rexml/document'
@doc ||= REXML::Document.new(stream)
end

def manifest_entries
manifest_xml.root.elements.to_a('/ListBucketResult/Contents/Key')
end

def sorted_manifest_directories
entries = manifest_entries.map do |el|
el.text.strip.sub(/_\$folder\$$/, '/')
end
dirs = {"." => []}
entries.sort.each do |f|
dirs[File.dirname(f)] ||= []
dirs[File.dirname(f)] << f
end
dirs
end

def write_index_html(html, dir, entries)
html.puts <<HDR
---
layout: main
title: Files/#{dir == '.' ? '' : dir}
---
<h1>Files/#{dir == '.' ? '' : dir}</h1>
<p class="trackDownloads">
HDR
parent = File.dirname(dir)
parent = parent == '.' ? '' : "#{parent}/"
html.puts " <a href='/files/#{parent}index.html'>..</a><br/>" unless dir == '.'
entries.sort.each do |entry|
if entry =~ /\/$/
html.puts " <a href='/files/#{entry}index.html'>#{File.basename(entry)}</a><br/>"
else
html.puts " <a href='http://jruby.org.s3.amazonaws.com/#{entry}'>#{File.basename(entry)}</a><br/>"
end
end
html.puts "</p>"
end

def s3_connect
require 'aws/s3'
begin
AWS::S3::Base.connection
rescue
ey_cloud = open(File.expand_path('~/.ey-cloud.yml')) { |f| YAML::load(f) }
AWS::S3::Base.establish_connection!(
:access_key_id => ey_cloud[:aws_secret_id],
:secret_access_key => ey_cloud[:aws_secret_key])
end
end

def jruby_org_bucket
s3_connect
AWS::S3::Bucket.find('jruby.org')
end

def add_public_read_perm(obj)
return if obj.acl.grants.detect {|g| g.grantee.group == "AllUsers" && g.permission == "READ" }
puts "Updating #{obj.key} to be publicly readable"
obj.acl.grants << AWS::S3::ACL::Grant.grant(:public_read)
obj.acl(obj.acl) # save the permissions
end

def log_line_match(line)
unless @line_def
require 'request_log_analyzer'
require 'request_log_analyzer/file_format'
require 'request_log_analyzer/file_format/amazon_s3'
@format = RequestLogAnalyzer::FileFormat::AmazonS3.create
@req = @format.request
@line_def = @format.line_definitions[:access]
# Bleh. R-L-A's S3 format is a little buggy, this fixes it
@line_def.regexp = Regexp.new(@line_def.regexp.to_s.sub('(\\d+) (\\d+) (\\d+) (\\d+)', '([^\\ ]+) ([^\\ ]+) ([^\\ ]+) ([^\\ ]+)'))
end
@line_def.match_for(line, @req)
end

require 'date'
def jruby_download_summary(date = nil)
date ||= Date.today - 1
s3_connect
log_objects = AWS::S3::Bucket.objects('jrubylogs', :prefix => "jruby-access-log/#{date.to_s}")
requests = {}
log_objects.each do |log|
log.value.each_line do |line|
match_hash = log_line_match(line)
if match_hash && match_hash[:key] && match_hash[:http_status] == 200
file = match_hash[:key]
if file =~ /.(zip|exe|tar\.gz)$/
requests[file] ||= 0
requests[file] += 1
end
end
end
end
if requests.size == 0
puts "No requests on #{date}"
else
total = 0
max_width = requests.keys.max {|a,b| a.length <=> b.length }.length
requests.keys.sort.each do |k|
total += requests[k]
puts "%-#{max_width}s %s" % [k, requests[k]]
end
puts "%-#{max_width}s %s" % ["Total", total]
end
end

0 comments on commit c6250ce

Please sign in to comment.