Skip to content

Commit

Permalink
Update, with Pig processing.
Browse files Browse the repository at this point in the history
  • Loading branch information
rjurney committed Dec 21, 2011
1 parent 832f4c8 commit 12585f7
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
17 changes: 17 additions & 0 deletions README.md
Expand Up @@ -44,3 +44,20 @@ Execute:

### Processing via Pig

Generating to/from pairs from all emails:

REGISTER /me/pig/build/ivy/lib/Pig/avro-1.5.3.jar
REGISTER /me/pig/build/ivy/lib/Pig/json-simple-1.1.jar
REGISTER /me/pig/contrib/piggybank/java/piggybank.jar
REGISTER /me/pig/build/ivy/lib/Pig/jackson-core-asl-1.7.3.jar
REGISTER /me/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.7.3.jar

DEFINE AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage();
rmf '/tmp/mail_pairs.avro'

messages = LOAD '/tmp/10000_emails.avro' USING AvroStorage();
smaller = FOREACH messages GENERATE from, to;
pairs = FOREACH smaller GENERATE from, FLATTEN(to) AS to:chararray;

STORE pairs INTO '/tmp/mail_pairs.avro' USING AvroStorage();

22 changes: 8 additions & 14 deletions setup.sh
@@ -1,13 +1,8 @@
pig_version=0.9.1
voldemort_version=0.90.1

echo "Installing Pig ${pig_version}..."
echo "Installing Pig from trunk..."
mkdir lib
cd lib
wget http://mirror.olnevhost.net/pub/apache//pig/pig-${pig_version}/pig-${pig_version}.tar.gz
tar -xvzf pig-${pig_version}.tar.gz
rm pig-${pig_version}.tar.gz
cd pig-${pig_version}
git clone https://github.com/apache/pig.git
cd pig

# Patch PIG-2411 so we can use the AvroStorage UDF - see https://issues.apache.org/jira/browse/PIG-2411
cp ../../src/avro/avrobug.patch .
Expand All @@ -19,13 +14,12 @@ ant
cd ../../../..

echo "Setting up pig environment..."
export CLASSPATH=$CLASSPATH:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/avro-1.4.1.jar\
:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/json-simple-1.1.jar\
:$PATH/lib/pig-${pig_version}/contrib/piggybank/java/piggybank.jar\
:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/jackson-core-asl-1.6.0.jar\
:$PATH/lib/pig-${pig_version}/build/ivy/lib/Pig/jackson-mapper-asl-1.6.0.jar
export CLASSPATH=$CLASSPATH:$PATH/lib/pig/build/ivy/lib/Pig/avro-1.4.1.jar\
:$PATH/lib/pig/build/ivy/lib/Pig/json-simple-1.1.jar\
:$PATH/lib/pig/contrib/piggybank/java/piggybank.jar\
:$PATH/lib/pig/build/ivy/lib/Pig/jackson-core-asl-1.6.0.jar\
:$PATH/lib/pig/build/ivy/lib/Pig/jackson-mapper-asl-1.6.0.jar

wget http://google-mail-xoauth-tools.googlecode.com/svn/trunk/python/xoauth.py
cd ..

echo "Setup done!"
File renamed without changes.

0 comments on commit 12585f7

Please sign in to comment.