Skip to content

Commit

Permalink
Fixed bug in changing API for varaha.text.TokenizeText UDF
Browse files Browse the repository at this point in the history
  • Loading branch information
rjurney committed Jan 8, 2014
1 parent 01bcb13 commit 0926a40
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions ch07/pig/topics.pig
Expand Up @@ -2,7 +2,7 @@
%default HOME `echo \$HOME/Software/`

/* Avro uses json-simple, and is in piggybank until Pig 0.12, where AvroStorage and TrevniStorage are builtins */
REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.5.3.jar
REGISTER $HOME/pig/build/ivy/lib/Pig/avro-1.7.4.jar
REGISTER $HOME/pig/build/ivy/lib/Pig/json-simple-1.1.jar
REGISTER $HOME/pig/contrib/piggybank/java/piggybank.jar

Expand All @@ -12,7 +12,7 @@ DEFINE LENGTH org.apache.pig.piggybank.evaluation.string.LENGTH();
REGISTER $HOME/varaha/lib/*.jar /* Varaha has a good tokenizer */
REGISTER $HOME/varaha/target/varaha-1.0-SNAPSHOT.jar

DEFINE TokenizeText varaha.text.TokenizeText();
DEFINE TokenizeText varaha.text.TokenizeText('1', '1');

set default_parallel 20

Expand All @@ -25,6 +25,7 @@ import 'ntfidf.macro';

/* Load emails and trim unneeded fields */
emails = load '/me/Data/test_mbox' using AvroStorage();
-- emails = FILTER emails BY body IS NOT NULL;
id_body_address = foreach emails generate message_id, body, from.address as address;

/* Project and latten to message_id/address/token and basic filter */
Expand Down

0 comments on commit 0926a40

Please sign in to comment.