Skip to content

Commit

Permalink
first version should be done...
Browse files Browse the repository at this point in the history
  • Loading branch information
Brian Muller committed Jan 19, 2011
1 parent 268b65c commit 35bbad9
Show file tree
Hide file tree
Showing 12 changed files with 2,317 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
@@ -0,0 +1,3 @@
docs
lib
pkg
674 changes: 674 additions & 0 deletions LICENSE

Large diffs are not rendered by default.

Empty file removed README.markdown
Empty file.
73 changes: 73 additions & 0 deletions README.rdoc
@@ -0,0 +1,73 @@
= ganapati -- Hadoop HDFS Thrift interface for Ruby

A Ruby thrift lib for interfacing with Hadoop's distributed file system, HDFS.

To install:
gem install ganapati

== Starting thrift server
Documentation in Hadoop for the thrift interface to HDFS is crap. It can be found here[http://wiki.apache.org/hadoop/HDFS-APIs].

As a much simpler and safer way of auto compiling and then starting the thrift interface, use the provided script:
bin/hdfs_thrift_server <port>

This will start a thrift server on the given port (after compiling the server code provided in the Hadoop distribution).

== Basic Usage
require 'rubygems'
require 'ganapati'

# args are host, port, and optional timeout
client = Ganapati::Client.new 'localhost', 1234

# copy a file to hdfs
client.put("/some/file", "/some/hadoop/path")

# get a file from hadoop
client.get("/some/hadoop/path", "/local/path")

# Create a file
f = client.create("/home/someuser/afile.txt")
f.write("this is some text")
# Always, always close the file
f.close

# Create a file with code block
client.create("/home/someuser/afile.txt") { |f|
f.write("this is some text")
}

# Open a file for reading and read it
c.open('/home/someuser/afile.txt') { |f|
puts f.read
# or read for specific length from start
puts f.read(0, 4)
}

# Open a file for appending and append to it
c.append('/home/someuser/afile.txt') { |f|
f.write "new data"
}

## Common file methods are available (chown, chmod, mkdir, stat, etc). Examples:
# move a file
client.mv "/home/someuser/afile.txt", "/home/someuser/test.txt"

# remove a file
client.rm "/home/someuser/test.txt"

# test for file existance
client.exists? "/home/someuser/test.txt"

# get a list of all files
client.ls "/home"

client.close

# Quick and dirty way to print remote file. The run class method takes care of closing the client.
puts Ganapati::Client.run('localhost', 1234) { |c| c.open('/home/someuser/afile.txt') { |f| f.read } }





48 changes: 48 additions & 0 deletions Rakefile
@@ -0,0 +1,48 @@
require 'rubygems'
require 'rake'
require 'rake/testtask'
require 'rake/rdoctask'
require 'rake/gempackagetask'

desc "Create documentation"
Rake::RDocTask.new("doc") { |rdoc|
rdoc.title = "Ganapati - Hadoop HDFS thrift interface for Ruby"
rdoc.rdoc_dir = 'docs'
rdoc.rdoc_files.include('README.rdoc')
rdoc.rdoc_files.include('lib/**/*.rb')
}

desc "Re-generate thrift files"
task "regen_thrift" do
if ENV['HADOOP_HOME'].nil?
puts "You must set your HADOOP_HOME variable before calling this task."
return
end
system "thrift --gen rb -o /tmp #{ENV['HADOOP_HOME']}src/contrib/thriftfs/if/hadoopfs.thrift"
system "mv /tmp/gen-rb/* lib/thrift"
end

spec = Gem::Specification.new do |s|
s.name = "ganapati"
s.version = "0.0.1"
s.authors = ["Brian Muller"]
s.date = %q{2011-01-19}
s.description = "Hadoop HDFS Thrift interface for Ruby"
s.summary = "Simple lib for interfaceing with Hadoop's distributed file system HDFS."
s.email = "brian.muller@livingsocial.com"
s.files = FileList["lib/**/*"]
s.homepage = "https://github.com/livingsocial/ganapati"
s.require_paths = ["lib"]
s.bindir = "bin"
s.executables << 'hdfs_thrift_server'
s.rubygems_version = "1.3.5"
s.add_dependency('thrift', '>= 0.5.0')
end

Rake::GemPackageTask.new(spec) do |pkg|
pkg.need_zip = true
pkg.need_tar = true
end

desc "Default task: builds gem"
task :default => [ :gem, :doc ]
20 changes: 20 additions & 0 deletions bin/hdfs_thrift_server
@@ -0,0 +1,20 @@
#!/bin/bash
JARFILE=$HADOOP_HOME/lib/HadoopThriftServer.jar
CONTRIBLIB=$HADOOP_HOME/src/contrib/thriftfs/lib
CLASSPATH=$(ls $HADOOP_HOME/hadoop-core-*.jar):$(ls $HADOOP_HOME/lib/commons-logging-api-*.jar):$CONTRIBLIB/hadoopthriftapi.jar:$CONTRIBLIB/libthrift.jar:$HADOOP_HOME/conf

if [ "$1" == "" ]; then
echo "Usage: $0 <port>"
exit 1
fi

if [ ! -e "$JARFILE" ]; then
echo "Creating thrift server jar file at $JARFILE"
mkdir -p /tmp/HadoopThriftServer
javac -classpath $CLASSPATH -d /tmp/HadoopThriftServer/ $CONTRIBLIB/../src/java/org/apache/hadoop/thriftfs/HadoopThriftServer.java
jar -cf $JARFILE -C /tmp/HadoopThriftServer/ .
else
echo "Found jar file at $JARFILE"
fi

java -Dcom.sun.management.jmxremote -cp $JARFILE:$CLASSPATH org.apache.hadoop.thriftfs.HadoopThriftServer $1
7 changes: 7 additions & 0 deletions lib/ganapati.rb
@@ -0,0 +1,7 @@
require 'ganapati/client'
require 'ganapati/hfile'

$:.unshift File.join(File.dirname(__FILE__), 'thrift')
require 'thrift_hadoop_file_system'
require 'hadoopfs_constants'

124 changes: 124 additions & 0 deletions lib/ganapati/client.rb
@@ -0,0 +1,124 @@
module Ganapati

class Client
def initialize(server, port, timeout=60)
socket = Thrift::Socket.new(server, port)
@transport = Thrift::BufferedTransport.new(socket)
@transport.open
protocol = Thrift::BinaryProtocol.new(@transport)
@client = ThriftHadoopFileSystem::Client.new(protocol)
@client.setInactivityTimeoutPeriod(timeout)
end

def close
@transport.close
end

# shutdown the thrift server
def shutdown(status=0)
@client.shutdown status
end

# copy local file to remote
def put(localpath, destpath)
create(destpath) { |dest|
Kernel.open(localpath) { |source|
# read 1 MB at a time
while record = source.read(1048576)
dest.write(record)
end
}
}
end

# copy remote file to local
def get(remotepath, destpath)
Kernel.open(destpath, 'w') { |dest|
open(remotepath) { |source|
size = source.length
index = 0
while index < size
dest.write(source.read(index, 1048576))
index += 1048576
end
}
}
end

# for writing to a new file
def create(path, &block)
file_handle :create, path, &block
end

# for reading
def open(path, &block)
file_handle :open, path, &block
end

# for appending
def append(path, &block)
file_handle :append, path, &block
end

def rm(path, recursive=false)
@client.rm pname(path), recursive
end

def mv(source, dest)
@client.rename pname(source), pname(dest)
end

def mkdir(path)
@client.mkdirs pname(path)
end

def exists?(path)
@client.exists pname(path)
end

def stat(path)
@client.stat pname(path)
end

def ls(path, details=false)
statuses = @client.listStatus pname(path)
(details) ? statuses : statuses.map { |s| s.path }
end

def chmod(path, mode)
@client.chmod pname(path), mode
end

def chown(path, owner, group)
@client.chown pname(path), owner, group
end

def set_replication(path, level)
@client.setReplication pname(path), level
end

def self.run(server, port)
c = Client.new(server, port)
result = yield c
c.close
result
end

private
def file_handle(action, path)
pathname = pname(path)
fh = @client.send action, pathname
result = f = HFile.new(@client, fh, pathname)
if block_given?
result = yield f
f.close
end
result
end

def pname(path)
Pathname.new(:pathname => path.to_s)
end
end

end
37 changes: 37 additions & 0 deletions lib/ganapati/hfile.rb
@@ -0,0 +1,37 @@
module Ganapati

class HFile
def initialize(client, handle, pathname)
@client = client
@handle = handle
@pathname = pathname
end

def write(data)
call :write, data
end

def read(offset=0, size=nil)
size ||= stat.length
call :read, offset, size
end

def close
call :close
end

def stat
@client.stat(@pathname)
end

def length
stat.length
end

private
def call(method, *args)
@client.send method, @handle, *args
end
end

end
8 changes: 8 additions & 0 deletions lib/thrift/hadoopfs_constants.rb
@@ -0,0 +1,8 @@
#
# Autogenerated by Thrift
#
# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
#

require 'hadoopfs_types'

0 comments on commit 35bbad9

Please sign in to comment.