Skip to content

Commit

Permalink
fixed dataset parameter issues, completely eliminated secret key
Browse files Browse the repository at this point in the history
  • Loading branch information
rdulepet committed Mar 28, 2010
1 parent a446178 commit 1bf6fa5
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 34 deletions.
27 changes: 3 additions & 24 deletions global.rb
Expand Up @@ -15,14 +15,6 @@ class Global
@@results_dir = nil

# CONSTANTS
# parse xml find S3 location and store results in S3
S3_OPTIONS = { 'x-amz-acl' => 'public-read' } # For now all is public
# Keys for the main CRData Amazon account - read from env!
AWS_ACCESS_KEY = 'AKIAJZ5KSZXV2N4XIKNA'
AWS_SECRET_KEY = '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'
MAIN_BUCKET = 'crdataapp'
MAIN_BUCKET_URL = 'http://crdataapp.s3.amazonaws.com/'

SUCCESSFUL_JOB = 'Successful Job'
FAILED_JOB = 'Failed Job'
RETURN_STATUS = 'FAILED JOB, PLEASE CHECK LOG'
Expand Down Expand Up @@ -61,18 +53,6 @@ def self.set_results_dir
@@results_dir = (FileUtils.pwd + "/" + TEMP_DIR) unless @@results_dir
end

# Helper to return an interface to S3
def self.s3if
# A trck to control the RightAWS logging
$VERBOSE = nil if @verbose == 0 # Totally silence ruby if we're in silent mode. Useful for cron scripts

s3_opts = {:multi_thread => true, :logger => nil}

$S3 ||= RightAws::S3Interface.new(Global::AWS_ACCESS_KEY, Global::AWS_SECRET_KEY, s3_opts)

$S3
end

def self.rand_hex_3(l)
"%0#{l}x" % rand(1 << l*4)
end
Expand All @@ -81,17 +61,16 @@ def self.rand_uuid
[8,4,4,4,12].map {|n| rand_hex_3(n)}.join('-')
end


def self.create_if_missing_directory *names
names.each do |name| FileUtils.mkdir(name) unless File.directory?(name) end
end
end

class String
def clean_s3_url
self.gsub(Global::MAIN_BUCKET_URL,'')
end
def last_part
self[self.rindex('/')+1..-1]
end
def last_part_without_params
self[self.rindex('/')+1..-1].gsub /\?Signature.*/, ''
end
end
15 changes: 5 additions & 10 deletions job.rb
Expand Up @@ -15,11 +15,11 @@
require 's3_upload'

class Job
JOB_FIELDS = %w[name value kind data_set_s3_file]
JOB_FIELDS = %w[name value kind data_set_url]
PARAM_NAME = "name"
PARAM_VALUE = "value"
PARAM_KIND = "kind"
PARAM_DATA_SET = "data_set_s3_file"
PARAM_DATA_SET = "data_set_url"
VALUE_DATA_SET = "Dataset"
VALUE_INTEGER = "Integer"
VALUE_BOOLEAN = "Boolean"
Expand Down Expand Up @@ -87,21 +87,16 @@ def fetch_params
job_params = {}

JOB_FIELDS.each do |el|
job_params[el] = CGI::unescapeHTML(param.at(el).innerHTML)
job_params[el] = CGI::unescapeHTML(CGI::unescape(param.at(el).innerHTML))
end

if job_params[PARAM_KIND] == VALUE_DATA_SET
just_name = job_params[PARAM_DATA_SET].to_s.last_part
just_name = job_params[PARAM_DATA_SET].to_s.last_part_without_params
#@r_call_interface.assign(job_params[PARAM_NAME], just_name)
r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{just_name}\""
Global.logger.info("R_PARAMETER::#{job_params[PARAM_NAME]} = #{just_name}")

data_file_handle = File.new("#{Global.results_dir}/#{@curr_uuid}/#{just_name}", 'wb')
# stream file in chunks especially makes more sense for larger files
rhdr = Global.s3if.get(Global::MAIN_BUCKET, job_params[PARAM_DATA_SET].to_s.clean_s3_url) do |chunk|
data_file_handle.write chunk
end
data_file_handle.close
fetch_data_file job_params[PARAM_DATA_SET], "#{Global.results_dir}/#{@curr_uuid}/#{just_name}"
elsif job_params[PARAM_KIND] == VALUE_STRING
#@r_call_interface.assign(job_params[PARAM_NAME], job_params[PARAM_VALUE].to_s)
r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{job_params[PARAM_VALUE].to_s}\""
Expand Down
23 changes: 23 additions & 0 deletions s3_upload.rb
Expand Up @@ -23,6 +23,29 @@ def upload_results_to_s3 (server_name, job_id, type, fname, fpath_name)

end

def fetch_data_file(s3url, dest_file)
url = URI.parse s3url
http = Net::HTTP.new(url.host, url.port)
http.use_ssl = (url.scheme == 'https')

puts url.path

# save output
data_file_handle = File.new(dest_file, 'wb')

request = Net::HTTP::Get.new(url.path)
# stream the file (efficient for larger files
http.request(request) do |res|
size, total = 0, res.header['Content-Length'].to_i
res.read_body do |chunk|
size += chunk.size
#puts "%d%% done (%d of %d)" % [(size * 100) / total, size, total]
data_file_handle.write chunk
end
end
data_file_handle.close
end

# Helper to generate REST requests, handle authentication and errors
def send_request(req_type, host, port, url, fields_hash, up_file, ssl)
res = false
Expand Down

0 comments on commit 1bf6fa5

Please sign in to comment.