Permalink
Switch branches/tags
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
86 lines (71 sloc) 2.94 KB
require_relative 'lib/ruby-manta'
# You'll need to provide these four environment variables to run this
# example. E.g.:
# MANTA_USER=john KEY=~/.ssh/john MANTA_URL=https://us-east.manta.joyent.com LOCAL_DIR=. ruby example.rb
host = ENV['MANTA_URL']
user = ENV['MANTA_USER']
priv_key = ENV['MANTA_KEY' ]
upload_dir = ENV['LOCAL_DIR' ]
raise 'You must specify MANTA_URL' unless host
raise 'You must specify MANTA_USER' unless user
raise 'You must specify MANTA_KEY' unless priv_key
raise 'You must specify LOCAL_DIR' unless upload_dir
# Read in private key, create a MantaClient instance. MantaClient is
# thread-safe and provides persistent connections with pooling, so you'll
# only ever need a single instance of this in a program.
priv_key_data = File.read(priv_key)
client = RubyManta::MantaClient.new(host, user, priv_key_data,
:disable_ssl_verification => true,
# :subuser => 'monte',
)
# Create an directory in Manta solely for this example run.
dir_path = '/' + user + '/stor/ruby-manta-example'
client.put_directory(dir_path)
# Upload files in a local directory to the Manta directory.
file_paths = Dir[upload_dir + '/*'].select { |p| File.file? p }
file_paths.each do |file_path|
file_name = File.basename(file_path)
# Be careful about binary files and file encodings in Ruby 1.9. If you don't
# use ASCII-8BIT (forced by 'rb' below), expect timeouts while PUTing an
# object.
file_data = File.open(file_path, 'rb') { |f| f.read }
client.put_object(dir_path + '/' + file_name, file_data)
end
# This example job runs the wc UNIX command on every object for the
# map phase, then uses awk during reduce to sum up the three numbers each wc
# returned.
job_details = {
:name => 'total word count',
:phases => [ {
:exec => 'wc'
}, {
:type => 'reduce',
:exec => "awk '{ l += $1; w += $2; c += $3 } END { print l, w, c }'"
} ]
}
# Create the job, then add the objects the job should operate on.
job_path, _ = client.create_job(job_details)
entries, _ = client.list_directory(dir_path)
obj_paths = entries.select { |e| e['type'] == 'object' }.
map { |e| dir_path + '/' + e['name'] }
client.add_job_keys(job_path, obj_paths)
# Tell Manta we're done adding objects to the job. Manta doesn't need this
# to start running a job -- you can see map results without it, for
# example -- but reduce phases in particular depend on all mapping
# finishing.
client.end_job_input(job_path)
# Poll until Manta finishes the job.
begin
sleep 1
job, _ = client.get_job(job_path)
end while job['state'] != 'done'
# We know in this case there will be only one result. Fetch it and
# display it.
results, _ = client.get_job_output(job_path)
data, _ = client.get_object(results[0])
puts data
# Clean up; remove objects and directory.
obj_paths.each do |obj_path|
client.delete_object(obj_path)
end
client.delete_directory(dir_path)