forked from baeeq/incubator-spark
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Add WordCountExample and a walk-through to the README
- Loading branch information
Evan Chan
committed
Dec 8, 2013
1 parent
c39e403
commit 37f0297
Showing
2 changed files
with
106 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
jobserver/src/test/scala/spark.jobserver/WordCountExample.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package spark.jobserver | ||
|
||
import com.typesafe.config.{Config, ConfigFactory} | ||
import org.apache.spark._ | ||
import org.apache.spark.SparkContext._ | ||
import scala.util.Try | ||
|
||
/** | ||
* A super-simple Spark job example that implements the SparkJob trait and can be submitted to the job server. | ||
* | ||
* Set the config with the sentence to split or count: | ||
* input.string = "adsfasdf asdkf safksf a sdfa" | ||
* | ||
* validate() returns SparkJobInvalid if there is no input.string | ||
*/ | ||
object WordCountExample extends SparkJob { | ||
def main(args: Array[String]) { | ||
val sc = new SparkContext("local[4]", "WordCountExample") | ||
val config = ConfigFactory.parseString("") | ||
val results = runJob(sc, config) | ||
println("Result is " + results) | ||
} | ||
|
||
override def validate(sc: SparkContext, config: Config): SparkJobValidation = { | ||
Try(config.getString("input.string")) | ||
.map(x => SparkJobValid) | ||
.getOrElse(SparkJobInvalid("No input.string config param")) | ||
} | ||
|
||
override def runJob(sc: SparkContext, config: Config): Any = { | ||
val dd = sc.parallelize(config.getString("input.string").split(" ").toSeq) | ||
dd.map((_, 1)).reduceByKey(_ + _).collect().toMap | ||
} | ||
} |