Permalink
Browse files

Merge branch 'release/1.10.0'

  • Loading branch information...
2 parents a42b3c3 + c7a4ab4 commit f9633efcd6bcacc75debfef4255d3dda27adca73 @nathanmarz nathanmarz committed Jul 14, 2012
View
@@ -4,11 +4,13 @@
language: clojure
lein: lein2
-script: lein2 compile, midje
+script: lein2 do compile, midje
branches:
only:
- develop
- master
+jdk:
+ - openjdk6
notifications:
email:
- nathanmarz@gmail.com
View
@@ -1,3 +1,9 @@
+### 1.9.1
+
+* Upgraded Kryo to 2.16 (via `cascading.kryo` and `carbonite` upgrades)
+* Redid JCascalog interface with a fluent API
+* Added PredicateMacroTemplate to JCascalog
+
### 1.9.0
* Cascalog now uses the Apache 2.0 license (formerly used GNU)
View
@@ -26,9 +26,21 @@ Cascalog is hosted at [Clojars](http://clojars.org/cascalog). Clojars is a maven
To include Cascalog in your leiningen or cake project, add the following to your `project.clj`:
+General
+
[cascalog "1.9.0"] ;; under :dependencies
[org.apache.hadoop/hadoop-core "0.20.2-dev"] ;; under :dev-dependencies
+Leiningen 2.0
+
+ :dependencies [cascalog "1.9.0"]
+ :profiles { :dev {:dependencies [[org.apache.hadoop/hadoop-core "0.20.2-dev"]]}}
+
+Leiningen < 2.0
+
+ :dependencies [cascalog "1.9.0"]
+ :dev-dependencies [[org.apache.hadoop/hadoop-core "0.20.2-dev"]]
+
Note that Cascalog is compatible with Clojure 1.2.0, 1.2.1, 1.3.0, and 1.4.0.
# Documentation and Issue Tracker
View
@@ -4,14 +4,14 @@
:exclusions [org.codehaus.janino/janino
org.apache.hadoop/hadoop-core]]
[org.clojure/tools.macro "0.1.1"]
- [cascading.kryo "0.3.1"]
- [cascalog/carbonite "1.2.1"]
+ [cascading.kryo "0.4.0"]
+ [cascalog/carbonite "1.3.0"]
[log4j/log4j "1.2.16"]
[hadoop-util "0.2.8"]
[com.twitter/maple "0.2.0"]
[jackknife "0.1.2"]])
-(defproject cascalog/cascalog "1.9.0"
+(defproject cascalog/cascalog "1.10.0"
:description "Hadoop without the Hassle."
:url "http://www.cascalog.org"
:license {:name "Eclipse Public License"
View
@@ -210,15 +210,18 @@
"Executes one or more queries and returns a seq of seqs of tuples
back, one for each subquery given.
- Syntax: (??- sink1 query1 sink2 query2 ...)"
- [& subqueries]
- ;; TODO: should be checking for flow name here
- (io/with-fs-tmp [fs tmp]
- (hadoop/mkdirs fs tmp)
- (let [outtaps (for [q subqueries] (hfs-seqfile (str tmp "/" (u/uuid))))
- bindings (mapcat vector outtaps subqueries)]
- (apply ?- bindings)
- (doall (map rules/get-sink-tuples outtaps)))))
+ Syntax: (??- query1 query2 ...) or (??- query-name query1 query2 ...)
+
+ If the first argument is a string, that will be used as the name
+ for the query and will show up in the JobTracker UI."
+ [& args]
+ (let [[name [& subqueries]] (rules/parse-exec-args args)]
+ (io/with-fs-tmp [fs tmp]
+ (hadoop/mkdirs fs tmp)
+ (let [outtaps (for [q subqueries] (hfs-seqfile (str tmp "/" (u/uuid))))
+ bindings (mapcat vector outtaps subqueries)]
+ (apply ?- name bindings)
+ (doall (map rules/get-sink-tuples outtaps))))))
(defmacro ?<-
"Helper that both defines and executes a query in a single call.
@@ -9,7 +9,7 @@
[cascading.operation Filter]
[cascading.tuple Fields]
[clojure.lang IFn]
- [jcascalog PredicateMacro Subquery ClojureOp]
+ [jcascalog PredicateMacro Subquery ClojureOp PredicateMacroTemplate]
[cascalog ClojureParallelAggregator ClojureBuffer
ClojureBufferCombiner CombinerSpec CascalogFunction
CascalogFunctionExecutor CascadingFilterToFunction
@@ -169,6 +169,7 @@
(defn predicate-macro? [p]
(or (var? p)
(instance? PredicateMacro p)
+ (instance? PredicateMacroTemplate p)
(instance? Subquery p)
(instance? ClojureOp p)
(and (map? p) (= :predicate-macro (:type p)))
@@ -20,7 +20,7 @@
[cascalog CombinerSpec ClojureCombiner ClojureCombinedAggregator Util
ClojureParallelAgg]
[org.apache.hadoop.mapred JobConf]
- [jcascalog Predicate Subquery PredicateMacro ClojureOp]
+ [jcascalog Predicate Subquery PredicateMacro ClojureOp PredicateMacroTemplate]
[java.util ArrayList]))
;; infields for a join are the names of the join fields
@@ -572,7 +572,8 @@
new-name))
(defn- pred-macro-updater [[replacements ret] [op vars]]
- (let [newvars (postwalk #(if (v/cascalog-var? %)
+ (let [vars (vec vars) ; in case it's a java data structure
+ newvars (postwalk #(if (v/cascalog-var? %)
(new-var-name! replacements %)
%)
vars)]
@@ -625,6 +626,9 @@
(instance? PredicateMacro p)
(.getPredicates p (to-jcascalog-fields invars) (to-jcascalog-fields outvars))
+ (instance? PredicateMacroTemplate p)
+ [[(.getCompiledPredMacro p) vars]]
+
:else
((:pred-fn p) invars outvars))))
@@ -7,20 +7,4 @@
public static Object REVERSE = Keyword.intern("reverse");
public static Object TRAP = Keyword.intern("trap");
public static Object DISTINCT = Keyword.intern("distinct");
-
- public static Predicate distinct(boolean shouldDistinct) {
- return new Predicate(DISTINCT, new Fields(shouldDistinct));
- }
-
- public static Predicate sort(Fields sortFields) {
- return new Predicate(SORT, sortFields);
- }
-
- public static Predicate reverse(boolean shouldReverse) {
- return new Predicate(REVERSE, new Fields(shouldReverse));
- }
-
- public static Predicate trap(Object trapTap) {
- return new Predicate(TRAP, new Fields(trapTap));
- }
}
@@ -5,25 +5,31 @@
import java.util.List;
public class Predicate {
- List<Object> _fieldsDeclaration = new ArrayList<Object>();
+ List<Object> _initialFields;
+ List<Object> _outFields = null;
Object _op;
- public Predicate(Object op, Fields defaultFields) {
+ public Predicate(Object op, List<Object> initialFields) {
_op = op;
- _fieldsDeclaration.addAll(defaultFields);
+ _initialFields = initialFields;
}
- public Predicate(Object op, Fields infields, Fields outFields) {
+ public Predicate(Object op, List<Object> inFields, List<Object> outFields) {
_op = op;
- _fieldsDeclaration.addAll(infields);
- _fieldsDeclaration.add(Keyword.intern(">"));
- _fieldsDeclaration.addAll(outFields);
- }
+ _initialFields = inFields;
+ _outFields = outFields;
+ }
public List<Object> toRawCascalogPredicate() {
+ List<Object> fieldsDeclaration = new ArrayList<Object>();
+ fieldsDeclaration.addAll(_initialFields);
+ if(_outFields != null) {
+ fieldsDeclaration.add(Keyword.intern(">"));
+ fieldsDeclaration.addAll(_outFields);
+ }
List<Object> pred = new ArrayList<Object>();
- pred.add(_op); // the op
- pred.add(_fieldsDeclaration);
+ pred.add(_op);
+ pred.add(fieldsDeclaration);
return pred;
}
}
@@ -0,0 +1,81 @@
+package jcascalog;
+
+import cascalog.Util;
+import clojure.lang.Keyword;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class PredicateMacroTemplate {
+ public static PredicateMacroTemplateBuilder build(String... inFields) {
+ return build(Arrays.asList(inFields));
+ }
+
+ public static PredicateMacroTemplateBuilder build(List<String> inFields) {
+ return new PredicateMacroTemplateBuilder(inFields);
+ }
+
+ public static class PredicateMacroTemplateBuilder {
+ List<String> _inFields;
+
+ public PredicateMacroTemplateBuilder(List<String> inFields) {
+ _inFields = inFields;
+ }
+
+ public PredicateMacroTemplate out(String... outFields) {
+ return out(Arrays.asList(outFields));
+ }
+
+ public PredicateMacroTemplate out(List<String> outFields) {
+ return new PredicateMacroTemplate(_inFields, outFields);
+ }
+ }
+
+ List<String> _inFields;
+ List<String> _outFields;
+ List<Predicate> _preds = new ArrayList<Predicate>();
+ Predicate _currPred = null;
+
+ public PredicateMacroTemplate(List<String> inFields, List<String> outFields) {
+ _inFields = inFields;
+ _outFields = outFields;
+ }
+
+ public PredicateMacroTemplate predicate(Object op, Object... fields) {
+ return predicate(op, Arrays.asList(fields));
+ }
+
+ public PredicateMacroTemplate predicate(Object op, List<Object> fields) {
+ _currPred = new Predicate(op, fields);
+ _preds.add(_currPred);
+ return this;
+ }
+
+ public PredicateMacroTemplate predicate(Predicate p) {
+ _preds.add(p);
+ return this;
+ }
+
+ public PredicateMacroTemplate out(Object... fields) {
+ return out(Arrays.asList(fields));
+ }
+
+ public PredicateMacroTemplate out(List<Object> fields) {
+ if(_currPred==null) {
+ throw new RuntimeException("Cannot declare outfields for no predicate");
+ } else {
+ _currPred._outFields = fields;
+ _currPred = null;
+ return this;
+ }
+ }
+
+ public Object getCompiledPredMacro() {
+ List<Object> fieldsDeclaration = new ArrayList<Object>();
+ fieldsDeclaration.addAll(_inFields);
+ fieldsDeclaration.add(Keyword.intern(">"));
+ fieldsDeclaration.addAll(_outFields);
+ return Util.bootSimpleFn("cascalog.rules", "build-rule")
+ .invoke(fieldsDeclaration, _preds);
+ }
+}
@@ -1,22 +1,54 @@
package jcascalog;
import cascalog.Util;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class Subquery {
- Object _compiled;
+ List<Predicate> _preds = new ArrayList<Predicate>();
+ Fields _outFields;
+ Predicate _currPred = null;
- public Subquery(Fields outFields, Predicate... preds) {
- this(outFields, Arrays.asList(preds));
+ public Subquery(String... fields) {
+ this(Arrays.asList(fields));
}
-
- public Subquery(Fields outFields, List<Predicate> preds) {
- _compiled = Util.bootSimpleFn("cascalog.rules", "build-rule")
- .invoke(outFields, preds);
+
+ public Subquery(List<String> fields) {
+ _outFields = new Fields((List) fields);
}
public Object getCompiledSubquery() {
- return _compiled;
+ return Util.bootSimpleFn("cascalog.rules", "build-rule")
+ .invoke(_outFields, _preds);
+ }
+
+ public Subquery predicate(Object op, Object... fields) {
+ return predicate(op, Arrays.asList(fields));
+ }
+
+ public Subquery predicate(Object op, List<Object> fields) {
+ _currPred = new Predicate(op, fields);
+ _preds.add(_currPred);
+ return this;
+ }
+
+ public Subquery predicate(Predicate p) {
+ _preds.add(p);
+ return this;
+ }
+
+ public Subquery out(Object... fields) {
+ return out(Arrays.asList(fields));
+ }
+
+ public Subquery out(List<Object> fields) {
+ if(_currPred==null) {
+ throw new RuntimeException("Cannot declare outfields for no predicate");
+ } else {
+ _currPred._outFields = fields;
+ _currPred = null;
+ return this;
+ }
}
}
Oops, something went wrong.

0 comments on commit f9633ef

Please sign in to comment.