Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updating README, adding wrappers for main scalding classes with addit…
…ional functionality
- Loading branch information
Kian Wilcox
committed
Aug 8, 2012
1 parent
ac2eee8
commit 8ddac87
Showing
2 changed files
with
62 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
59 changes: 59 additions & 0 deletions
59
src/main/scala/com/stumbleupon/scalding/extensions/ScaldingWrappers.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package com.stumbleupon.scalding.extensions | ||
|
||
import scala.util.matching.Regex | ||
import cascading.pipe.Pipe | ||
import cascading.tuple.Fields | ||
import com.twitter.scalding._ | ||
import cascading.flow._ | ||
|
||
class PipeWrapper(input:Pipe) extends java.io.Serializable { | ||
import Dsl._ | ||
import ScaldingWrapperConversions._ | ||
|
||
def pipe = input | ||
|
||
|
||
// groups by the given fields, discarding any groups which match the predicate | ||
def discardGroupsWhere[A](f:Fields)(fn: A => Boolean) | ||
(implicit conv : TupleConverter[A]) : RichPipe = { | ||
conv.assertArityMatches(f) | ||
input.joinWithSmaller((f -> f), | ||
input.groupBy(f) { _.count((f -> new Fields('__count__.name)))(fn)}.filter(new Fields('__count__.name)) { | ||
count: Long => count == 0 | ||
}) | ||
} | ||
|
||
// groups by the given fields, discarding any groups which do not match the predicate | ||
def filterGroupsWhere[A](f:Fields)(fn: A => Boolean) | ||
(implicit conv : TupleConverter[A]) : RichPipe = { | ||
conv.assertArityMatches(f) | ||
input.joinWithSmaller((f -> f), | ||
input.groupBy(f) { _.count((f -> new Fields('__count__.name)))(fn)}.filter(new Fields('__count__.name)) { | ||
count: Long => count > 0 | ||
}) | ||
} | ||
|
||
} | ||
|
||
class GroupBuilderWrapper(val group:GroupBuilder) { | ||
|
||
} | ||
|
||
class JobWrapper(val job:Job) { | ||
|
||
} | ||
|
||
class StumbleJob(args:Args) extends Job(args) { | ||
|
||
|
||
} | ||
|
||
object ScaldingWrapperConversions { | ||
import Dsl._ | ||
implicit def inputToPipeWrapper(input:Pipe) = new PipeWrapper(input) | ||
implicit def pipeWrapperToPipe(input:PipeWrapper) = input.pipe | ||
implicit def wrapGroupBuilder(group:GroupBuilder) = new GroupBuilderWrapper(group) | ||
implicit def groupBuilderWrapperToGroup(group:GroupBuilderWrapper) = group.group | ||
implicit def wrapJob(job:Job) = new JobWrapper(job) | ||
implicit def jobWrapperToJob(job:JobWrapper) = job.job | ||
} |