Permalink
Browse files

fixed dataset parameter issues, completely eliminated secret key

  • Loading branch information...
1 parent a446178 commit 1bf6fa569d3edff29f65c89af55e0d36551499f6 @rdulepet committed Mar 28, 2010
Showing with 31 additions and 34 deletions.
  1. +3 −24 global.rb
  2. +5 −10 job.rb
  3. +23 −0 s3_upload.rb
View
@@ -15,14 +15,6 @@ class Global
@@results_dir = nil
# CONSTANTS
- # parse xml find S3 location and store results in S3
- S3_OPTIONS = { 'x-amz-acl' => 'public-read' } # For now all is public
- # Keys for the main CRData Amazon account - read from env!
- AWS_ACCESS_KEY = 'AKIAJZ5KSZXV2N4XIKNA'
- AWS_SECRET_KEY = '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'
- MAIN_BUCKET = 'crdataapp'
- MAIN_BUCKET_URL = 'http://crdataapp.s3.amazonaws.com/'
-
SUCCESSFUL_JOB = 'Successful Job'
FAILED_JOB = 'Failed Job'
RETURN_STATUS = 'FAILED JOB, PLEASE CHECK LOG'
@@ -61,18 +53,6 @@ def self.set_results_dir
@@results_dir = (FileUtils.pwd + "/" + TEMP_DIR) unless @@results_dir
end
- # Helper to return an interface to S3
- def self.s3if
- # A trck to control the RightAWS logging
- $VERBOSE = nil if @verbose == 0 # Totally silence ruby if we're in silent mode. Useful for cron scripts
-
- s3_opts = {:multi_thread => true, :logger => nil}
-
- $S3 ||= RightAws::S3Interface.new(Global::AWS_ACCESS_KEY, Global::AWS_SECRET_KEY, s3_opts)
-
- $S3
- end
-
def self.rand_hex_3(l)
"%0#{l}x" % rand(1 << l*4)
end
@@ -81,17 +61,16 @@ def self.rand_uuid
[8,4,4,4,12].map {|n| rand_hex_3(n)}.join('-')
end
-
def self.create_if_missing_directory *names
names.each do |name| FileUtils.mkdir(name) unless File.directory?(name) end
end
end
class String
- def clean_s3_url
- self.gsub(Global::MAIN_BUCKET_URL,'')
- end
def last_part
self[self.rindex('/')+1..-1]
end
+ def last_part_without_params
+ self[self.rindex('/')+1..-1].gsub /\?Signature.*/, ''
+ end
end
View
15 job.rb
@@ -15,11 +15,11 @@
require 's3_upload'
class Job
- JOB_FIELDS = %w[name value kind data_set_s3_file]
+ JOB_FIELDS = %w[name value kind data_set_url]
PARAM_NAME = "name"
PARAM_VALUE = "value"
PARAM_KIND = "kind"
- PARAM_DATA_SET = "data_set_s3_file"
+ PARAM_DATA_SET = "data_set_url"
VALUE_DATA_SET = "Dataset"
VALUE_INTEGER = "Integer"
VALUE_BOOLEAN = "Boolean"
@@ -87,21 +87,16 @@ def fetch_params
job_params = {}
JOB_FIELDS.each do |el|
- job_params[el] = CGI::unescapeHTML(param.at(el).innerHTML)
+ job_params[el] = CGI::unescapeHTML(CGI::unescape(param.at(el).innerHTML))
end
if job_params[PARAM_KIND] == VALUE_DATA_SET
- just_name = job_params[PARAM_DATA_SET].to_s.last_part
+ just_name = job_params[PARAM_DATA_SET].to_s.last_part_without_params
#@r_call_interface.assign(job_params[PARAM_NAME], just_name)
r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{just_name}\""
Global.logger.info("R_PARAMETER::#{job_params[PARAM_NAME]} = #{just_name}")
- data_file_handle = File.new("#{Global.results_dir}/#{@curr_uuid}/#{just_name}", 'wb')
- # stream file in chunks especially makes more sense for larger files
- rhdr = Global.s3if.get(Global::MAIN_BUCKET, job_params[PARAM_DATA_SET].to_s.clean_s3_url) do |chunk|
- data_file_handle.write chunk
- end
- data_file_handle.close
+ fetch_data_file job_params[PARAM_DATA_SET], "#{Global.results_dir}/#{@curr_uuid}/#{just_name}"
elsif job_params[PARAM_KIND] == VALUE_STRING
#@r_call_interface.assign(job_params[PARAM_NAME], job_params[PARAM_VALUE].to_s)
r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{job_params[PARAM_VALUE].to_s}\""
View
@@ -23,6 +23,29 @@ def upload_results_to_s3 (server_name, job_id, type, fname, fpath_name)
end
+def fetch_data_file(s3url, dest_file)
+ url = URI.parse s3url
+ http = Net::HTTP.new(url.host, url.port)
+ http.use_ssl = (url.scheme == 'https')
+
+ puts url.path
+
+ # save output
+ data_file_handle = File.new(dest_file, 'wb')
+
+ request = Net::HTTP::Get.new(url.path)
+ # stream the file (efficient for larger files
+ http.request(request) do |res|
+ size, total = 0, res.header['Content-Length'].to_i
+ res.read_body do |chunk|
+ size += chunk.size
+ #puts "%d%% done (%d of %d)" % [(size * 100) / total, size, total]
+ data_file_handle.write chunk
+ end
+ end
+ data_file_handle.close
+end
+
# Helper to generate REST requests, handle authentication and errors
def send_request(req_type, host, port, url, fields_hash, up_file, ssl)
res = false

0 comments on commit 1bf6fa5

Please sign in to comment.