Permalink
Browse files

first version should be done...

  • Loading branch information...
1 parent 268b65c commit 35bbad9d6cef503f9db249402687de4fcb4ce2fc @bmuller bmuller committed Jan 19, 2011
View
@@ -0,0 +1,3 @@
+docs
+lib
+pkg
View
674 LICENSE
Oops, something went wrong.
View
No changes.
View
@@ -0,0 +1,73 @@
+= ganapati -- Hadoop HDFS Thrift interface for Ruby
+
+A Ruby thrift lib for interfacing with Hadoop's distributed file system, HDFS.
+
+To install:
+ gem install ganapati
+
+== Starting thrift server
+Documentation in Hadoop for the thrift interface to HDFS is crap. It can be found here[http://wiki.apache.org/hadoop/HDFS-APIs].
+
+As a much simpler and safer way of auto compiling and then starting the thrift interface, use the provided script:
+ bin/hdfs_thrift_server <port>
+
+This will start a thrift server on the given port (after compiling the server code provided in the Hadoop distribution).
+
+== Basic Usage
+ require 'rubygems'
+ require 'ganapati'
+
+ # args are host, port, and optional timeout
+ client = Ganapati::Client.new 'localhost', 1234
+
+ # copy a file to hdfs
+ client.put("/some/file", "/some/hadoop/path")
+
+ # get a file from hadoop
+ client.get("/some/hadoop/path", "/local/path")
+
+ # Create a file
+ f = client.create("/home/someuser/afile.txt")
+ f.write("this is some text")
+ # Always, always close the file
+ f.close
+
+ # Create a file with code block
+ client.create("/home/someuser/afile.txt") { |f|
+ f.write("this is some text")
+ }
+
+ # Open a file for reading and read it
+ c.open('/home/someuser/afile.txt') { |f|
+ puts f.read
+ # or read for specific length from start
+ puts f.read(0, 4)
+ }
+
+ # Open a file for appending and append to it
+ c.append('/home/someuser/afile.txt') { |f|
+ f.write "new data"
+ }
+
+ ## Common file methods are available (chown, chmod, mkdir, stat, etc). Examples:
+ # move a file
+ client.mv "/home/someuser/afile.txt", "/home/someuser/test.txt"
+
+ # remove a file
+ client.rm "/home/someuser/test.txt"
+
+ # test for file existance
+ client.exists? "/home/someuser/test.txt"
+
+ # get a list of all files
+ client.ls "/home"
+
+ client.close
+
+ # Quick and dirty way to print remote file. The run class method takes care of closing the client.
+ puts Ganapati::Client.run('localhost', 1234) { |c| c.open('/home/someuser/afile.txt') { |f| f.read } }
+
+
+
+
+
View
@@ -0,0 +1,48 @@
+require 'rubygems'
+require 'rake'
+require 'rake/testtask'
+require 'rake/rdoctask'
+require 'rake/gempackagetask'
+
+desc "Create documentation"
+Rake::RDocTask.new("doc") { |rdoc|
+ rdoc.title = "Ganapati - Hadoop HDFS thrift interface for Ruby"
+ rdoc.rdoc_dir = 'docs'
+ rdoc.rdoc_files.include('README.rdoc')
+ rdoc.rdoc_files.include('lib/**/*.rb')
+}
+
+desc "Re-generate thrift files"
+task "regen_thrift" do
+ if ENV['HADOOP_HOME'].nil?
+ puts "You must set your HADOOP_HOME variable before calling this task."
+ return
+ end
+ system "thrift --gen rb -o /tmp #{ENV['HADOOP_HOME']}src/contrib/thriftfs/if/hadoopfs.thrift"
+ system "mv /tmp/gen-rb/* lib/thrift"
+end
+
+spec = Gem::Specification.new do |s|
+ s.name = "ganapati"
+ s.version = "0.0.1"
+ s.authors = ["Brian Muller"]
+ s.date = %q{2011-01-19}
+ s.description = "Hadoop HDFS Thrift interface for Ruby"
+ s.summary = "Simple lib for interfaceing with Hadoop's distributed file system HDFS."
+ s.email = "brian.muller@livingsocial.com"
+ s.files = FileList["lib/**/*"]
+ s.homepage = "https://github.com/livingsocial/ganapati"
+ s.require_paths = ["lib"]
+ s.bindir = "bin"
+ s.executables << 'hdfs_thrift_server'
+ s.rubygems_version = "1.3.5"
+ s.add_dependency('thrift', '>= 0.5.0')
+end
+
+Rake::GemPackageTask.new(spec) do |pkg|
+ pkg.need_zip = true
+ pkg.need_tar = true
+end
+
+desc "Default task: builds gem"
+task :default => [ :gem, :doc ]
View
@@ -0,0 +1,20 @@
+#!/bin/bash
+JARFILE=$HADOOP_HOME/lib/HadoopThriftServer.jar
+CONTRIBLIB=$HADOOP_HOME/src/contrib/thriftfs/lib
+CLASSPATH=$(ls $HADOOP_HOME/hadoop-core-*.jar):$(ls $HADOOP_HOME/lib/commons-logging-api-*.jar):$CONTRIBLIB/hadoopthriftapi.jar:$CONTRIBLIB/libthrift.jar:$HADOOP_HOME/conf
+
+if [ "$1" == "" ]; then
+ echo "Usage: $0 <port>"
+ exit 1
+fi
+
+if [ ! -e "$JARFILE" ]; then
+ echo "Creating thrift server jar file at $JARFILE"
+ mkdir -p /tmp/HadoopThriftServer
+ javac -classpath $CLASSPATH -d /tmp/HadoopThriftServer/ $CONTRIBLIB/../src/java/org/apache/hadoop/thriftfs/HadoopThriftServer.java
+ jar -cf $JARFILE -C /tmp/HadoopThriftServer/ .
+else
+ echo "Found jar file at $JARFILE"
+fi
+
+java -Dcom.sun.management.jmxremote -cp $JARFILE:$CLASSPATH org.apache.hadoop.thriftfs.HadoopThriftServer $1
View
@@ -0,0 +1,7 @@
+require 'ganapati/client'
+require 'ganapati/hfile'
+
+$:.unshift File.join(File.dirname(__FILE__), 'thrift')
+require 'thrift_hadoop_file_system'
+require 'hadoopfs_constants'
+
View
@@ -0,0 +1,124 @@
+module Ganapati
+
+ class Client
+ def initialize(server, port, timeout=60)
+ socket = Thrift::Socket.new(server, port)
+ @transport = Thrift::BufferedTransport.new(socket)
+ @transport.open
+ protocol = Thrift::BinaryProtocol.new(@transport)
+ @client = ThriftHadoopFileSystem::Client.new(protocol)
+ @client.setInactivityTimeoutPeriod(timeout)
+ end
+
+ def close
+ @transport.close
+ end
+
+ # shutdown the thrift server
+ def shutdown(status=0)
+ @client.shutdown status
+ end
+
+ # copy local file to remote
+ def put(localpath, destpath)
+ create(destpath) { |dest|
+ Kernel.open(localpath) { |source|
+ # read 1 MB at a time
+ while record = source.read(1048576)
+ dest.write(record)
+ end
+ }
+ }
+ end
+
+ # copy remote file to local
+ def get(remotepath, destpath)
+ Kernel.open(destpath, 'w') { |dest|
+ open(remotepath) { |source|
+ size = source.length
+ index = 0
+ while index < size
+ dest.write(source.read(index, 1048576))
+ index += 1048576
+ end
+ }
+ }
+ end
+
+ # for writing to a new file
+ def create(path, &block)
+ file_handle :create, path, &block
+ end
+
+ # for reading
+ def open(path, &block)
+ file_handle :open, path, &block
+ end
+
+ # for appending
+ def append(path, &block)
+ file_handle :append, path, &block
+ end
+
+ def rm(path, recursive=false)
+ @client.rm pname(path), recursive
+ end
+
+ def mv(source, dest)
+ @client.rename pname(source), pname(dest)
+ end
+
+ def mkdir(path)
+ @client.mkdirs pname(path)
+ end
+
+ def exists?(path)
+ @client.exists pname(path)
+ end
+
+ def stat(path)
+ @client.stat pname(path)
+ end
+
+ def ls(path, details=false)
+ statuses = @client.listStatus pname(path)
+ (details) ? statuses : statuses.map { |s| s.path }
+ end
+
+ def chmod(path, mode)
+ @client.chmod pname(path), mode
+ end
+
+ def chown(path, owner, group)
+ @client.chown pname(path), owner, group
+ end
+
+ def set_replication(path, level)
+ @client.setReplication pname(path), level
+ end
+
+ def self.run(server, port)
+ c = Client.new(server, port)
+ result = yield c
+ c.close
+ result
+ end
+
+ private
+ def file_handle(action, path)
+ pathname = pname(path)
+ fh = @client.send action, pathname
+ result = f = HFile.new(@client, fh, pathname)
+ if block_given?
+ result = yield f
+ f.close
+ end
+ result
+ end
+
+ def pname(path)
+ Pathname.new(:pathname => path.to_s)
+ end
+ end
+
+end
View
@@ -0,0 +1,37 @@
+module Ganapati
+
+ class HFile
+ def initialize(client, handle, pathname)
+ @client = client
+ @handle = handle
+ @pathname = pathname
+ end
+
+ def write(data)
+ call :write, data
+ end
+
+ def read(offset=0, size=nil)
+ size ||= stat.length
+ call :read, offset, size
+ end
+
+ def close
+ call :close
+ end
+
+ def stat
+ @client.stat(@pathname)
+ end
+
+ def length
+ stat.length
+ end
+
+ private
+ def call(method, *args)
+ @client.send method, @handle, *args
+ end
+ end
+
+end
@@ -0,0 +1,8 @@
+#
+# Autogenerated by Thrift
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+
+require 'hadoopfs_types'
+
Oops, something went wrong.

0 comments on commit 35bbad9

Please sign in to comment.