Permalink
Browse files

Early work on code for blog post.

  • Loading branch information...
1 parent 90051b4 commit d4c6ae723c88789f75e868695be81e3e22518326 @rjurney committed Sep 15, 2012
Showing with 39 additions and 0 deletions.
  1. +6 −0 cassandra.txt
  2. +28 −0 cassandra_enron.pig
  3. +4 −0 env.sh
  4. 0 index.py
  5. +1 −0 words.py
View
@@ -0,0 +1,6 @@
+create keyspace enron;
+use enron;
+create column family email with
+ comparator = UTF8Type and
+ default_validation_class = UTF8Type and
+ key_validation_class = UTF8Type;
View
@@ -0,0 +1,28 @@
+/* CassandraStorage and utilities */
+register /me/Software/apache-cassandra-1.1.5-src/build/apache-cassandra*.jar
+register /me/Software/apache-cassandra-1.1.5-src/lib/*.jar
+register /me/Software/apache-cassandra-1.1.5-src/build/lib/jars/*.jar /* */
+register /me/Software/pygmalion/udf/target/pygmalion-1.1.0-SNAPSHOT.jar
+
+define CassandraStorage org.apache.cassandra.hadoop.pig.CassandraStorage();
+define FromCassandraBag org.pygmalion.udf.FromCassandraBag();
+define ToCassandraBag org.pygmalion.udf.ToCassandraBag();
+
+/* AvroStorage */
+register /me/Software/pig/build/ivy/lib/Pig/avro-1.5.3.jar
+register /me/Software/pig/build/ivy/lib/Pig/json-simple-1.1.jar
+register /me/Software/pig/contrib/piggybank/java/piggybank.jar
+
+define AvroStorage org.apache.pig.piggybank.storage.avro.AvroStorage();
+
+emails = load '/me/Data/enron.avro' using AvroStorage();
+emails = filter emails by message_id is not null;
+id_body = foreach emails generate message_id, body;
+
+raw = LOAD 'cassandra://pygmalion/account' USING CassandraStorage();
+rows = FOREACH raw GENERATE key, FLATTEN(FromCassandraBag('first_name, last_name, birth_place', columns)) AS (
+ first_name:chararray,
+ last_name:chararray,
+ birth_place:chararray
+);
+dump rows
View
4 env.sh
@@ -0,0 +1,4 @@
+export PIG_INITIAL_ADDRESS=localhost
+export PIG_RPC_PORT=9160
+export PIG_PARTITIONER=org.apache.cassandra.dht.RandomPartitioner
+export CASSANDRA_HOME=$HOME/apache-cassandra-1.1.5-src #makeyourown
View
No changes.
View
@@ -0,0 +1 @@
+import nltk

0 comments on commit d4c6ae7

Please sign in to comment.