Skip to content

Commit

Permalink
Merge branch 'master' of github.com:mrflip/wukong
Browse files Browse the repository at this point in the history
  • Loading branch information
Philip (flip) Kromer committed Jan 27, 2012
2 parents a4d4e9c + ada8bac commit f71b8c0
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 52 deletions.
2 changes: 1 addition & 1 deletion VERSION
@@ -1 +1 @@
2.0.1 2.0.2
1 change: 1 addition & 0 deletions lib/wukong.rb
Expand Up @@ -10,6 +10,7 @@ module Wukong
autoload :Streamer, 'wukong/streamer' autoload :Streamer, 'wukong/streamer'
autoload :Store, 'wukong/store' autoload :Store, 'wukong/store'
autoload :FilenamePattern, 'wukong/filename_pattern' autoload :FilenamePattern, 'wukong/filename_pattern'
autoload :Decorator, 'wukong/decorator'


def self.run mapper, reducer=nil, options={} def self.run mapper, reducer=nil, options={}
Wukong::Script.new(mapper, reducer, options).run Wukong::Script.new(mapper, reducer, options).run
Expand Down
28 changes: 28 additions & 0 deletions lib/wukong/decorator.rb
@@ -0,0 +1,28 @@
require 'java'

java_import 'com.cloudera.flume.core.Event'
java_import 'com.cloudera.flume.core.EventImpl'
java_import 'com.cloudera.flume.core.EventSinkDecorator'

module Wukong
class Decorator < EventSinkDecorator

def initialize(mapper, reducer=nil, options={})
super(nil)
@mapper = mapper.new
end

def append(e)
line = String.from_java_bytes(e.getBody)
record = @mapper.recordize(line.chomp)
@mapper.process(*record) do |output|
processed = output.to_flat.join("\t")
event = EventImpl.new(processed.to_java_bytes, e.getTimestamp, e.getPriority, e.getNanos, e.getHost, e.getAttrs)
super event
end
end

def run() self ; end

end
end
60 changes: 9 additions & 51 deletions wukong.gemspec
Expand Up @@ -4,21 +4,14 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-


Gem::Specification.new do |s| Gem::Specification.new do |s|
s.name = %q{wukong} s.name = "wukong"
s.version = "2.0.1" s.version = "2.0.2"


s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version= s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
s.authors = ["Philip (flip) Kromer"] s.authors = ["Philip (flip) Kromer"]
s.date = %q{2011-07-01} s.date = "2011-11-04"
s.description = %q{ Treat your dataset like a: s.description = " Treat your dataset like a:\n\n * stream of lines when it's efficient to process by lines\n * stream of field arrays when it's efficient to deal directly with fields\n * stream of lightweight objects when it's efficient to deal with objects\n\n Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.\n"
s.email = "flip@infochimps.org"
* stream of lines when it's efficient to process by lines
* stream of field arrays when it's efficient to deal directly with fields
* stream of lightweight objects when it's efficient to deal with objects
Wukong is friends with Hadoop the elephant, Pig the query language, and the cat on your command line.
}
s.email = %q{flip@infochimps.org}
s.executables = ["hdp-du", "hdp-sync", "hdp-wc", "wu-lign", "wu-sum", "hdp-parts_to_keys.rb"] s.executables = ["hdp-du", "hdp-sync", "hdp-wc", "wu-lign", "wu-sum", "hdp-parts_to_keys.rb"]
s.extra_rdoc_files = [ s.extra_rdoc_files = [
"LICENSE.textile", "LICENSE.textile",
Expand Down Expand Up @@ -180,6 +173,7 @@ Gem::Specification.new do |s|
"lib/wukong/datatypes.rb", "lib/wukong/datatypes.rb",
"lib/wukong/datatypes/enum.rb", "lib/wukong/datatypes/enum.rb",
"lib/wukong/datatypes/fake_types.rb", "lib/wukong/datatypes/fake_types.rb",
"lib/wukong/decorator.rb",
"lib/wukong/encoding.rb", "lib/wukong/encoding.rb",
"lib/wukong/encoding/asciize.rb", "lib/wukong/encoding/asciize.rb",
"lib/wukong/extensions.rb", "lib/wukong/extensions.rb",
Expand Down Expand Up @@ -254,46 +248,10 @@ Gem::Specification.new do |s|
"spec/wukong/script_spec.rb", "spec/wukong/script_spec.rb",
"wukong.gemspec" "wukong.gemspec"
] ]
s.homepage = %q{http://mrflip.github.com/wukong} s.homepage = "http://mrflip.github.com/wukong"
s.require_paths = ["lib"] s.require_paths = ["lib"]
s.rubygems_version = %q{1.5.0} s.rubygems_version = "1.8.10"
s.summary = %q{Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease.} s.summary = "Hadoop Streaming for Ruby. Wukong makes Hadoop so easy a chimpanzee can use it, yet handles terabyte-scale computation with ease."
s.test_files = [
"examples/contrib/jeans/normalize.rb",
"examples/contrib/jeans/sizes.rb",
"examples/corpus/bucket_counter.rb",
"examples/corpus/dbpedia_abstract_to_sentences.rb",
"examples/corpus/sentence_bigrams.rb",
"examples/corpus/sentence_coocurrence.rb",
"examples/corpus/words_to_bigrams.rb",
"examples/emr/elastic_mapreduce_example.rb",
"examples/ignore_me/counting.rb",
"examples/ignore_me/grouper.rb",
"examples/network_graph/adjacency_list.rb",
"examples/network_graph/breadth_first_search.rb",
"examples/network_graph/gen_2paths.rb",
"examples/network_graph/gen_multi_edge.rb",
"examples/network_graph/gen_symmetric_links.rb",
"examples/pagerank/pagerank.rb",
"examples/pagerank/pagerank_initialize.rb",
"examples/sample_records.rb",
"examples/server_logs/apache_log_parser.rb",
"examples/server_logs/breadcrumbs.rb",
"examples/server_logs/logline.rb",
"examples/server_logs/nook.rb",
"examples/server_logs/nook/faraday_dummy_adapter.rb",
"examples/server_logs/user_agent.rb",
"examples/simple_word_count.rb",
"examples/size.rb",
"examples/stats/avg_value_frequency.rb",
"examples/stats/binning_percentile_estimator.rb",
"examples/stats/rank_and_bin.rb",
"examples/stupidly_simple_filter.rb",
"examples/word_count.rb",
"spec/spec_helper.rb",
"spec/wukong/encoding_spec.rb",
"spec/wukong/script_spec.rb"
]


if s.respond_to? :specification_version then if s.respond_to? :specification_version then
s.specification_version = 3 s.specification_version = 3
Expand Down

0 comments on commit f71b8c0

Please sign in to comment.