Skip to content
Browse files

added command line utilities and bumped version

  • Loading branch information...
1 parent e3521e1 commit 1d11910d8b5e904bf052401bfbd2d918ea958269 @bmuller bmuller committed
Showing with 150 additions and 27 deletions.
  1. +9 −1 README.rdoc
  2. +8 −24 Rakefile
  3. +64 −0 bin/hcp
  4. +42 −0 bin/hls
  5. +22 −0 ganapati.gemspec
  6. +1 −0 lib/ganapati.rb
  7. +4 −2 lib/ganapati/client.rb
View
10 README.rdoc
@@ -1,6 +1,6 @@
= ganapati -- Hadoop HDFS Thrift interface for Ruby
-A Ruby thrift lib for interfacing with Hadoop's distributed file system, HDFS.
+Ganapati is a Ruby thrift lib for interfacing with Hadoop's distributed file system, HDFS. It also includes a few command line client utilities.
To install:
gem install ganapati
@@ -72,6 +72,14 @@ This will start a thrift server on the given port (after compiling the server co
# Quick and dirty way to print remote file. The run class method takes care of closing the client.
puts Ganapati::Client.run('localhost', 1234) { |c| c.open('/home/someuser/afile.txt') { |f| f.read } }
+== Command Line Utilities
+There are a few utility programs included in the bin directory. *hls* provides a way to see the contents of HDFS (recursively and verbosely with appropriate command line options):
+ ./bin/hls hdfs://host:port/tmp
+
+*hcp* provides a way to copy to/from/between HDFS servers:
+ ./bin/hcp hdfs://host:port/some/path/to/file ./file
+ ./bin/hcp ./file hdfs://host:port/some/path/to/file
+ ./bin/hcp hdfs://anotherhost:port/some/path/to/file hdfs://host:port/some/path/to/file
View
32 Rakefile
@@ -1,8 +1,9 @@
require 'rubygems'
-require 'rake'
+require 'bundler'
require 'rake/testtask'
require 'rake/rdoctask'
-require 'rake/gempackagetask'
+
+Bundler::GemHelper.install_tasks
desc "Create documentation"
Rake::RDocTask.new("doc") { |rdoc|
@@ -22,27 +23,10 @@ task "regen_thrift" do
system "mv /tmp/gen-rb/* lib/thrift"
end
-spec = Gem::Specification.new do |s|
- s.name = "ganapati"
- s.version = "0.0.4"
- s.authors = ["Brian Muller"]
- s.date = %q{2011-02-10}
- s.description = "Hadoop HDFS Thrift interface for Ruby"
- s.summary = "Simple lib for interfaceing with Hadoop's distributed file system HDFS."
- s.email = "brian.muller@livingsocial.com"
- s.files = FileList["lib/**/*"]
- s.homepage = "https://github.com/livingsocial/ganapati"
- s.require_paths = ["lib"]
- s.bindir = "bin"
- s.executables << 'hdfs_thrift_server'
- s.rubygems_version = "1.3.5"
- s.add_dependency('thrift', '>= 0.5.0')
-end
+task :default => [ :gem, :doc ]
+
+
+
+
-Rake::GemPackageTask.new(spec) do |pkg|
- pkg.need_zip = true
- pkg.need_tar = true
-end
-desc "Default task: builds gem"
-task :default => [ :gem, :doc ]
View
64 bin/hcp
@@ -0,0 +1,64 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'rubygems'
+require 'ganapati'
+
+options = { :recursive => false, :verbose => false }
+op = OptionParser.new { |opts|
+ opts.banner = "Usage: hcp [options] source dest"
+
+ opts.on('-h', '--help', 'displays usage information') {
+ puts opts
+ exit
+ }
+
+ opts.on('-v', '--verbose', 'verbose') {
+ options[:verbose] = true
+ }
+
+ opts.on('-r', '--recursive', 'copy recursively') {
+ puts "Recursive option not supported yet."
+ exit 1
+ #options[:recursive] = true
+ }
+}
+op.parse!
+
+if ARGV.length != 2
+ puts op.to_s
+ exit 1
+end
+
+class LocalFile
+ def create(location)
+ f = open(location, 'w')
+ yield f
+ f.close
+ end
+
+ def readlines(location)
+ f = open(location, 'r')
+ while (line = f.gets)
+ yield line
+ end
+ f.close
+ end
+
+ def close
+ end
+end
+
+spath = Ganapati::FileUrl.new ARGV[0], :file
+dpath = Ganapati::FileUrl.new ARGV[1], :file
+
+source = spath.hdfs? ? Ganapati::Client.new(spath.host, spath.port) : LocalFile.new
+dest = dpath.hdfs? ? Ganapati::Client.new(dpath.host, dpath.port) : LocalFile.new
+
+dest.create(dpath.path) { |f|
+ source.readlines(spath.path) { |line|
+ f.write(line)
+ }
+}
+
+source.close
+dest.close
View
42 bin/hls
@@ -0,0 +1,42 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'rubygems'
+require 'ganapati'
+
+VERBOSE_PROPS = [:path, :length, :isdir, :modification_time, :permission, :owner, :group]
+
+options = { :verbose => false, :recursive => false }
+op = OptionParser.new { |opts|
+ opts.banner = "Usage: hls [options] host:port/location"
+
+ opts.on('-h', '--help', 'displays usage information') {
+ puts opts
+ exit
+ }
+
+ opts.on('-r', '--recursive', 'copy recursively') {
+ options[:recursive] = true
+ }
+
+ opts.on('-v', '--verbose', 'verbose output') {
+ puts VERBOSE_PROPS.map { |a| a.to_s }.join("\t")
+ options[:verbose] = true
+ }
+}
+op.parse!
+
+if ARGV.length != 1
+ puts op.to_s
+ exit 1
+end
+
+url = Ganapati::FileUrl.new ARGV.first
+client = Ganapati::Client.new url.host, url.port
+client.ls(url.path, options[:verbose], options[:recursive]).each { |s|
+ if options[:verbose]
+ puts VERBOSE_PROPS.map { |p| s.send p }.join("\t")
+ else
+ puts s
+ end
+}
+client.close
View
22 ganapati.gemspec
@@ -0,0 +1,22 @@
+$:.push File.expand_path("../lib", __FILE__)
+require "ganapati/version"
+require "rake"
+
+Gem::Specification.new do |s|
+ s.name = "ganapati"
+ s.version = Ganapati::VERSION
+ s.authors = ["Brian Muller"]
+ s.date = %q{2011-02-10}
+ s.description = "Hadoop HDFS Thrift interface for Ruby"
+ s.summary = "Simple lib for interfaceing with Hadoop's distributed file system HDFS."
+ s.email = "brian.muller@livingsocial.com"
+ s.files = FileList["lib/**/*", "[A-Z]*", "Rakefile", "docs/**/*"]
+ s.homepage = "https://github.com/livingsocial/ganapati"
+ s.require_paths = ["lib"]
+ s.bindir = "bin"
+ s.executables << 'hdfs_thrift_server'
+ s.executables << 'hls'
+ s.executables << 'hcp'
+ s.rubyforge_project = "ganapati"
+ s.add_dependency('thrift', '>= 0.5.0')
+end
View
1 lib/ganapati.rb
@@ -1,5 +1,6 @@
require 'ganapati/client'
require 'ganapati/hfile'
+require 'ganapati/utils'
$:.unshift File.join(File.dirname(__FILE__), 'thrift')
require 'thrift_hadoop_file_system'
View
6 lib/ganapati/client.rb
@@ -103,9 +103,11 @@ def stat(path)
@client.stat pname(path)
end
- def ls(path, details=false)
+ def ls(path, details=false, recursive=false)
statuses = @client.listStatus pname(path)
- (details) ? statuses : statuses.map { |s| s.path }
+ paths = (details) ? statuses : statuses.map { |s| s.path }
+ return paths if not recursive
+ paths + statuses.select { |s| s.isdir }.map { |s| ls(s.path, details, recursive) }.flatten
end
def chmod(path, mode)

0 comments on commit 1d11910

Please sign in to comment.
Something went wrong with that request. Please try again.