Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Update, with Pig processing.

  • Loading branch information...
commit 12585f7123506cbb587870a65cdf651cb87c5c46 1 parent 832f4c8
@rjurney authored
View
17 README.md
@@ -44,3 +44,20 @@ Execute:
### Processing via Pig
+Generating to/from pairs from all emails:
+
+ REGISTER /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar
+ REGISTER /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar
+ REGISTER /me/pig/contrib/piggybank/java/piggybank.jar
+ REGISTER /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar
+ REGISTER /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar
+
+ DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage();
+ rmf '/tmp/mail_pairs.avro'
+
+ messages = LOAD '/tmp/10000_emails.avro' USING AvroStorage();
+ smaller = FOREACH messages GENERATE from, to;
+ pairs = FOREACH smaller GENERATE from, FLATTEN(to) AS to:chararray;
+
+ STORE pairs INTO '/tmp/mail_pairs.avro' USING AvroStorage();
+
View
22 setup.sh
@@ -1,13 +1,8 @@
-pig_version=0.9.1
-voldemort_version=0.90.1
-
-echo "Installing Pig ${pig_version}..."
+echo "Installing Pig from trunk..."
mkdir lib
cd lib
-wget http://mirror.olnevhost.net/pub/apache//pig/pig-${pig_version}/pig-${pig_version}.tar.gz
-tar -xvzf pig-${pig_version}.tar.gz
-rm pig-${pig_version}.tar.gz
-cd pig-${pig_version}
+git clone https://github.com/apache/pig.git
+cd pig
# Patch PIG-2411 so we can use the AvroStorage UDF - see https://issues.apache.org/jira/browse/PIG-2411
cp ../../src/avro/avrobug.patch .
@@ -19,13 +14,12 @@ ant
cd ../../../..
echo "Setting up pig environment..."
-export CLASSPATH=$CLASSPATH:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/avro-1.4.1.jar\
-:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/json-simple-1.1.jar\
-:$PATH/lib/pig-${pig_version}/contrib/piggybank/java/piggybank.jar\
-:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/jackson-core-asl-1.6.0.jar\
-:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/jackson-mapper-asl-1.6.0.jar
+export CLASSPATH=$CLASSPATH:$PATH/lib/pig/build/ivy/lib/Pig/avro-1.4.1.jar\
+:$PATH/lib/pig/build/ivy/lib/Pig/json-simple-1.1.jar\
+:$PATH/lib/pig/contrib/piggybank/java/piggybank.jar\
+:$PATH/lib/pig/build/ivy/lib/Pig/jackson-core-asl-1.6.0.jar\
+:$PATH/lib/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.6.0.jar
-wget http://google-mail-xoauth-tools.googlecode.com/svn/trunk/python/xoauth.py
cd ..
echo "Setup done!"
View
0  src/pig/test.pig → src/pig/pairs.pig
File renamed without changes
Please sign in to comment.
Something went wrong with that request. Please try again.