Permalink
Browse files

cascalog demo

  • Loading branch information...
0 parents commit 2fbb70ea77461850a33e86a8f597b10768bad65d @nathanmarz committed May 8, 2010
Showing with 142 additions and 0 deletions.
  1. +26 −0 .gitignore
  2. +25 −0 data/action/actions.txt
  3. +44 −0 data/follows/follows.txt
  4. +8 −0 project.clj
  5. +39 −0 src/clj/cascalog_demo/demo.clj
@@ -0,0 +1,26 @@
+CHILD
+CHILDMAKER
+NANNY
+_deps
+build
+target
+out
+lib
+pom.xml
+# use glob syntax.
+syntax: glob
+*.ser
+*.class
+*.jar
+*~
+*.bak
+*.off
+*.old
+.DS_Store
+*.#*
+*#*
+*.classpath
+*.project
+*.settings
+*.pyc
+*.dot
@@ -0,0 +1,25 @@
+nathan status=good 1273094927000
+nathan status=great 1273096922000
+nathan birthday 1273026922000
+nathan status=sleeping 1273029922000
+nathan status=great 1272029922000
+david newjob 1273096922000
+david status=good 1272029922000
+david travelling 1273094927000
+bob status=tired 1273026922000
+bob engaged 1272029922000
+dan basketball-captain 1273029922000
+dan status=great 1273029822000
+alex status=good 1273029922000
+danielle married 1273095924348
+danielle cooking-a-storm 1273094927000
+alice status=good 1273091927000
+alice rock-climbing 1273084927000
+chris hacking 1273084927000
+chris hacking 1273084827000
+mike tennis 1273084927000
+mike hacking 1273084827000
+nadia making-art 1273084927000
+vijay dancing 1273094927000
+vijay inventing 1273099927000
+jai lounging 1273084927000
@@ -0,0 +1,44 @@
+nathan david
+nathan bob
+nathan dan
+nathan alex
+nathan chris
+nathan mike
+david charles
+david nathan
+david alice
+david nadia
+david danielle
+david shiv
+david vijay
+bob alice
+bob nadia
+bob jai
+dan nathan
+dan mike
+dan chris
+dan jai
+dan vijay
+dan danielle
+alex nadia
+alex alice
+alex danielle
+alex nathan
+alice bob
+alice nathan
+alice vijay
+chris nathan
+chris vijay
+mike alice
+mike bob
+mike charles
+nadia dan
+nadia nathan
+nadia jai
+nadia vijay
+vijay jai
+vijay nadia
+vijay alice
+charles chris
+jai danielle
+jai mike
@@ -0,0 +1,8 @@
+(defproject cascalog-demo "1.0.0-SNAPSHOT"
+ :source-path "src/clj"
+ :dependencies [[org.clojure/clojure "1.1.0"]
+ [org.clojure/clojure-contrib "1.1.0"]
+ [cascalog "1.0.1-SNAPSHOT"]
+ ]
+ :dev-dependencies [[org.apache.hadoop/hadoop-core "0.20.2-dev"]]
+ :namespaces [cascalog-demo.demo])
@@ -0,0 +1,39 @@
+(ns cascalog-demo.demo
+ (:use cascalog.api)
+ (:require [cascalog [workflow :as w] [predicate :as p] [vars :as v] [ops :as c]])
+ (:gen-class))
+
+(defn textline-parsed [dir num-fields]
+ (let [outargs (v/gen-nullable-vars num-fields)
+ source (hfs-textline dir)]
+ (<- outargs (source ?line) (c/re-parse [#"[^\s]+"] ?line :>> outargs) (:distinct false))))
+
+(defn to-long [num] (Long/parseLong num))
+
+(defn follows-data [dir] (textline-parsed dir 2))
+
+(defn action-data [dir]
+ (let [source (textline-parsed dir 3)]
+ (<- [?person ?action ?time] (source ?person ?action ?time-str)
+ (to-long ?time-str :> ?time) (:distinct false))))
+
+(w/defbufferop mk-feed [tuples]
+ [(pr-str (take 5 tuples))])
+
+(defn action-score [now-ms folls time-ms]
+ (let [days-delta (div (- now-ms time-ms) 86400000)]
+ (div folls (+ days-delta 1))))
+
+(defn compute-news-feed [output-tap follows-dir action-dir]
+ (let [follows (follows-data follows-dir)
+ action (action-data action-dir)
+ follower-count (<- [?p ?c] (follows ?p2 ?p) (c/count ?c))]
+ (?<- output-tap [?p ?feed] (follows ?p ?p2) (action ?p2 ?action ?time)
+ (follower-count ?p2 ?folls)
+ (action-score (System/currentTimeMillis) ?folls ?time :> ?score)
+ (:sort ?score) (:reverse true)
+ (mk-feed ?p2 ?action ?time :> ?feed))))
+
+(defn -main [follows-dir action-dir output-dir]
+ (compute-news-feed (hfs-textline output-dir) follows-dir action-dir))
+

0 comments on commit 2fbb70e

Please sign in to comment.