fixed dataset parameter issues, completely eliminated secret key

rdulepet · Mar 28, 2010 · 1bf6fa5 · 1bf6fa5
1 parent a446178
commit 1bf6fa5
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 34 deletions.
diff --git a/global.rb b/global.rb
@@ -15,14 +15,6 @@ class Global
   @@results_dir = nil
 
   # CONSTANTS
-  # parse xml find S3 location and store results in S3
-  S3_OPTIONS = { 'x-amz-acl' => 'public-read' } # For now all is public
-  # Keys for the main CRData Amazon account - read from env!
-  AWS_ACCESS_KEY = 'AKIAJZ5KSZXV2N4XIKNA'
-  AWS_SECRET_KEY = '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'
-  MAIN_BUCKET    = 'crdataapp'
-  MAIN_BUCKET_URL = 'http://crdataapp.s3.amazonaws.com/'
-
   SUCCESSFUL_JOB = 'Successful Job'
   FAILED_JOB = 'Failed Job'
   RETURN_STATUS = 'FAILED JOB, PLEASE CHECK LOG'
@@ -61,18 +53,6 @@ def self.set_results_dir
       @@results_dir = (FileUtils.pwd + "/" + TEMP_DIR) unless @@results_dir
   end
 
-  # Helper to return an interface to S3
-  def self.s3if
-    # A trck to control the RightAWS logging
-    $VERBOSE = nil if @verbose == 0 # Totally silence ruby if we're in silent mode. Useful for cron scripts
-
-    s3_opts = {:multi_thread => true, :logger => nil}
-
-    $S3 ||= RightAws::S3Interface.new(Global::AWS_ACCESS_KEY, Global::AWS_SECRET_KEY, s3_opts)
-
-    $S3
-  end
-
   def self.rand_hex_3(l)
     "%0#{l}x" % rand(1 << l*4)
   end
@@ -81,17 +61,16 @@ def self.rand_uuid
     [8,4,4,4,12].map {|n| rand_hex_3(n)}.join('-')
   end
 
-
   def self.create_if_missing_directory *names
     names.each do |name| FileUtils.mkdir(name) unless File.directory?(name) end
   end
 end
 
 class String
-  def clean_s3_url
-     self.gsub(Global::MAIN_BUCKET_URL,'')
-  end
   def last_part
      self[self.rindex('/')+1..-1]
   end
+  def last_part_without_params
+    self[self.rindex('/')+1..-1].gsub /\?Signature.*/, ''
+  end
 end
diff --git a/job.rb b/job.rb
@@ -15,11 +15,11 @@
 require 's3_upload'
 
 class Job
-  JOB_FIELDS = %w[name value kind data_set_s3_file]
+  JOB_FIELDS = %w[name value kind data_set_url]
   PARAM_NAME = "name"
   PARAM_VALUE = "value"
   PARAM_KIND = "kind"
-  PARAM_DATA_SET = "data_set_s3_file"
+  PARAM_DATA_SET = "data_set_url"
   VALUE_DATA_SET = "Dataset"
   VALUE_INTEGER = "Integer"
   VALUE_BOOLEAN = "Boolean"
@@ -87,21 +87,16 @@ def fetch_params
         job_params = {}
 
         JOB_FIELDS.each do |el|
-          job_params[el] = CGI::unescapeHTML(param.at(el).innerHTML)
+          job_params[el] = CGI::unescapeHTML(CGI::unescape(param.at(el).innerHTML))
         end
 
         if job_params[PARAM_KIND] == VALUE_DATA_SET
-          just_name = job_params[PARAM_DATA_SET].to_s.last_part
+          just_name = job_params[PARAM_DATA_SET].to_s.last_part_without_params
           #@r_call_interface.assign(job_params[PARAM_NAME], just_name)
           r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{just_name}\""
           Global.logger.info("R_PARAMETER::#{job_params[PARAM_NAME]} = #{just_name}")
 
-          data_file_handle = File.new("#{Global.results_dir}/#{@curr_uuid}/#{just_name}", 'wb')
-          # stream file in chunks especially makes more sense for larger files
-          rhdr = Global.s3if.get(Global::MAIN_BUCKET, job_params[PARAM_DATA_SET].to_s.clean_s3_url) do |chunk|
-            data_file_handle.write chunk
-          end
-          data_file_handle.close
+          fetch_data_file job_params[PARAM_DATA_SET], "#{Global.results_dir}/#{@curr_uuid}/#{just_name}"
         elsif job_params[PARAM_KIND] == VALUE_STRING
           #@r_call_interface.assign(job_params[PARAM_NAME], job_params[PARAM_VALUE].to_s)
           r_script_inc_file_handle.puts "#{job_params[PARAM_NAME]} = \"#{job_params[PARAM_VALUE].to_s}\""

diff --git a/s3_upload.rb b/s3_upload.rb
@@ -23,6 +23,29 @@ def upload_results_to_s3 (server_name, job_id, type, fname, fpath_name)
 
 end
 
+def fetch_data_file(s3url, dest_file)
+	url = URI.parse s3url
+  http = Net::HTTP.new(url.host, url.port)
+  http.use_ssl = (url.scheme == 'https')
+
+  puts url.path
+
+  # save output
+  data_file_handle = File.new(dest_file, 'wb')
+
+  request = Net::HTTP::Get.new(url.path)
+  # stream the file (efficient for larger files
+  http.request(request) do |res|
+    size, total = 0, res.header['Content-Length'].to_i
+    res.read_body do |chunk|
+      size += chunk.size
+      #puts "%d%% done (%d of %d)" % [(size * 100) / total, size, total]
+      data_file_handle.write chunk
+    end
+  end
+  data_file_handle.close
+end
+
 # Helper to generate REST requests, handle authentication and errors
 def send_request(req_type, host, port, url, fields_hash, up_file, ssl)
   res = false