Permalink
Browse files

multi-tracker branch now compiles and runs; but it crashes right befo…

…re the

end. The same problem is seen also in the master branch (in the
ChainedStreaming implementation)
  • Loading branch information...
1 parent 4fdd482 commit 0d67bc1cee62594f2bc9c24734b1cf90c88b8bce Mosharaf Chowdhury committed Oct 12, 2010
Showing with 1,479 additions and 16,160 deletions.
  1. +9 −5 Makefile
  2. +27 −2 README
  3. +1 −0 conf/java-opts
  4. +8 −0 conf/log4j.properties
  5. +13 −0 conf/spark-env.sh
  6. +31 −9 run
  7. +1 −2 spark-executor
  8. +7 −11 src/examples/BroadcastTest.scala
  9. +0 −5 src/examples/SparkALS.scala
  10. +1 −1 src/examples/Vector.scala
  11. +7 −5 src/scala/spark/Accumulators.scala
  12. +476 −476 src/scala/spark/Broadcast.scala
  13. +4 −4 src/scala/spark/ClosureCleaner.scala
  14. +18 −14 src/scala/spark/Executor.scala
  15. +22 −11 src/scala/spark/HdfsFile.scala
  16. +14 −8 src/scala/spark/LocalScheduler.scala
  17. +49 −0 src/scala/spark/Logging.scala
  18. +135 −79 src/scala/spark/{NexusScheduler.scala → MesosScheduler.scala}
  19. +8 −7 src/scala/spark/ParallelArray.scala
  20. +156 −40 src/scala/spark/RDD.scala
  21. +1 −0 src/scala/spark/Scheduler.scala
  22. +16 −10 src/scala/spark/SparkContext.scala
  23. +13 −0 src/scala/spark/Split.scala
  24. +1 −1 src/scala/spark/Task.scala
  25. +26 −1 src/scala/spark/Utils.scala
  26. +77 −0 src/scala/spark/repl/ClassServer.scala
  27. +32 −10 src/scala/spark/repl/ExecutorClassLoader.scala
  28. +30 −14 src/scala/spark/repl/SparkInterpreter.scala
  29. +1 −1 src/scala/spark/repl/SparkInterpreterLoop.scala
  30. +0 −21 src/scala/ubiquifs/Header.scala
  31. +0 −49 src/scala/ubiquifs/Master.scala
  32. +0 −14 src/scala/ubiquifs/Message.scala
  33. +0 −141 src/scala/ubiquifs/Slave.scala
  34. +0 −11 src/scala/ubiquifs/UbiquiFS.scala
  35. +0 −12 src/scala/ubiquifs/Utils.scala
  36. BIN third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
  37. BIN third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
  38. 0 third_party/{google-collect-1.0-rc5 → guava-r06}/COPYING
  39. +28 −0 third_party/guava-r06/README
  40. BIN third_party/guava-r06/guava-r06.jar
  41. +0 −248 third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt
  42. +0 −104 third_party/hadoop-0.20.0/contrib/hod/README
  43. +0 −1 third_party/hadoop-0.20.0/contrib/hod/bin/VERSION
  44. +0 −31 third_party/hadoop-0.20.0/contrib/hod/bin/checknodes
  45. +0 −577 third_party/hadoop-0.20.0/contrib/hod/bin/hod
  46. +0 −183 third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup
  47. +0 −287 third_party/hadoop-0.20.0/contrib/hod/bin/hodring
  48. +0 −349 third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster
  49. +0 −11 third_party/hadoop-0.20.0/contrib/hod/bin/verify-account
  50. +0 −81 third_party/hadoop-0.20.0/contrib/hod/build.xml
  51. +0 −46 third_party/hadoop-0.20.0/contrib/hod/conf/hodrc
  52. +0 −172 third_party/hadoop-0.20.0/contrib/hod/config.txt
  53. +0 −233 third_party/hadoop-0.20.0/contrib/hod/getting_started.txt
  54. +0 −16 third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py
  55. +0 −104 third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py
  56. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py
  57. +0 −27 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py
  58. +0 −298 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py
  59. +0 −72 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py
  60. +0 −228 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py
  61. +0 −788 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py
  62. +0 −45 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py
  63. +0 −26 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py
  64. +0 −1,058 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py
  65. +0 −621 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py
  66. +0 −176 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py
  67. +0 −389 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py
  68. +0 −1,266 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py
  69. +0 −309 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py
  70. +0 −57 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py
  71. +0 −18 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py
  72. +0 −310 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py
  73. +0 −272 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py
  74. +0 −266 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py
  75. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py
  76. +0 −747 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py
  77. +0 −754 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py
  78. +0 −128 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py
  79. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py
  80. +0 −928 third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py
  81. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py
  82. +0 −334 third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py
  83. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py
  84. +0 −218 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py
  85. +0 −1,019 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py
  86. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py
  87. +0 −175 third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py
  88. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py
  89. +0 −49 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py
  90. +0 −15 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py
  91. +0 −127 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
  92. +0 −16 third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py
  93. +0 −22 third_party/hadoop-0.20.0/contrib/hod/ivy.xml
  94. +0 −5 third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties
  95. +0 −57 third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh
  96. +0 −212 third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py
  97. +0 −15 third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py
  98. +0 −33 third_party/hadoop-0.20.0/contrib/hod/testing/helper.py
  99. +0 −113 third_party/hadoop-0.20.0/contrib/hod/testing/lib.py
  100. +0 −83 third_party/hadoop-0.20.0/contrib/hod/testing/main.py
  101. +0 −123 third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py
  102. +0 −310 third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py
  103. +0 −113 third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py
  104. +0 −117 third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py
  105. +0 −88 third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py
  106. +0 −171 third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py
  107. +0 −99 third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py
  108. +0 −180 third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py
  109. +0 −62 third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py
  110. +0 −109 third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py
  111. BIN third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar
  112. BIN third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar
  113. BIN third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
  114. BIN third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
  115. BIN third_party/mesos.jar
  116. BIN third_party/{scalacheck_2.8.0.RC3-1.7.jar → scalacheck_2.8.0-1.7.jar}
  117. +202 −0 third_party/scalatest-1.2/LICENSE
  118. +7 −0 third_party/scalatest-1.2/NOTICE
  119. +58 −0 third_party/scalatest-1.2/README.txt
  120. BIN third_party/{scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar → scalatest-1.2/scalatest-1.2.jar}
  121. BIN third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
  122. BIN third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
View
@@ -2,15 +2,19 @@ EMPTY =
SPACE = $(EMPTY) $(EMPTY)
# Build up classpath by concatenating some strings
-JARS = third_party/nexus.jar
+JARS = third_party/mesos.jar
JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
JARS += third_party/colt.jar
-JARS += third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+JARS += third_party/guava-r06/guava-r06.jar
JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
-JARS += third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-JARS += third_party/scalacheck_2.8.0.RC3-1.7.jar
-JARS += third_party/FreePastry-2.1.jar
+JARS += third_party/scalatest-1.2/scalatest-1.2.jar
+JARS += third_party/scalacheck_2.8.0-1.7.jar
+JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
+JARS += third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
+JARS += third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
+JARS += third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
CLASSPATH = $(subst $(SPACE),:,$(JARS))
SCALA_SOURCES = src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
View
29 README
@@ -1,15 +1,40 @@
-Spark requires Scala 2.8. This version has been tested with 2.8.0RC3.
+BUILDING
+
+Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
To build and run Spark, you will need to have Scala's bin in your $PATH,
or you will need to set the SCALA_HOME environment variable to point
to where you've installed Scala. Scala must be accessible through one
-of these methods on Nexus slave nodes as well as on the master.
+of these methods on Mesos slave nodes as well as on the master.
To build Spark and the example programs, run make.
To run one of the examples, use ./run <class> <params>. For example,
./run SparkLR will run the Logistic Regression example. Each of the
example programs prints usage help if no params are given.
+All of the Spark samples take a <host> parameter that is the Mesos master
+to connect to. This can be a Mesos URL, or "local" to run locally with one
+thread, or "local[N]" to run locally with N threads.
+
Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
to have the Makefile use the fsc compiler daemon instead of scalac.
+
+CONFIGURATION
+
+Spark can be configured through two files: conf/java-opts and conf/spark-env.sh.
+
+In java-opts, you can add flags to be passed to the JVM when running Spark.
+
+In spark-env.sh, you can set any environment variables you wish to be available
+when running Spark programs, such as PATH, SCALA_HOME, etc. There are also
+several Spark-specific variables you can set:
+- SPARK_CLASSPATH: Extra entries to be added to the classpath, separated by ":".
+- SPARK_MEM: Memory for Spark to use, in the format used by java's -Xmx option
+ (for example, 200m meams 200 MB, 1g means 1 GB, etc).
+- SPARK_LIBRARY_PATH: Extra entries to add to java.library.path for locating
+ shared libraries.
+- SPARK_JAVA_OPTS: Extra options to pass to JVM.
+
+Note that spark-env.sh must be a shell script (it must be executable and start
+with a #! header to specify the shell to use).
View
@@ -0,0 +1 @@
+-Dspark.broadcast.masterHostAddress=127.0.0.1 -Dspark.broadcast.masterTrackerPort=11111 -Dspark.broadcast.blockSize=1024 -Dspark.broadcast.maxRetryCount=2 -Dspark.broadcast.serverSocketTimout=50000 -Dspark.broadcast.dualMode=false
View
@@ -0,0 +1,8 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
View
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# Set Spark environment variables for your site in this file. Some useful
+# variables to set are:
+# - MESOS_HOME, to point to your Mesos installation
+# - SCALA_HOME, to point to your Scala installation
+# - SPARK_CLASSPATH, to add elements to Spark's classpath
+# - SPARK_JAVA_OPTS, to add JVM options
+# - SPARK_MEM, to change the amount of memory used per node (this should
+# be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
+# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
+
+MESOS_HOME=/home/mosharaf/Work/mesos
View
40 run
@@ -3,27 +3,49 @@
# Figure out where the Scala framework is installed
FWDIR=`dirname $0`
-# Set JAVA_OPTS to be able to load libnexus.so and set various other misc options
-export JAVA_OPTS="-Djava.library.path=$FWDIR/third_party:$FWDIR/src/native -Xms100m -Xmx750m"
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+ . $FWDIR/conf/spark-env.sh
+fi
+
+if [ "x$MESOS_HOME" != "x" ] ; then
+ SPARK_CLASSPATH="$MESOS_HOME/lib/java/mesos.jar:$SPARK_CLASSPATH"
+ SPARK_LIBRARY_PATH="$MESOS_HOME/lib/java:$SPARK_LIBARY_PATH"
+fi
+
+if [ "x$SPARK_MEM" == "x" ] ; then
+ SPARK_MEM="300m"
+fi
+
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH:$FWDIR/third_party:$FWDIR/src/native"
+JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
if [ -e $FWDIR/conf/java-opts ] ; then
JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
fi
export JAVA_OPTS
# Build up classpath
-CLASSPATH=$FWDIR/build/classes
-CLASSPATH+=:$FWDIR/third_party/nexus.jar
+CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+CLASSPATH+=:$FWDIR/conf
+CLASSPATH+=:$FWDIR/third_party/mesos.jar
CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
CLASSPATH+=:$FWDIR/third_party/colt.jar
-CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
-CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
-CLASSPATH+=:$FWDIR/third_party/FreePastry-2.1.jar
+CLASSPATH+=:$FWDIR/third_party/scalatest-1.2/scalatest-1.2.jar
+CLASSPATH+=:$FWDIR/third_party/scalacheck_2.8.0-1.7.jar
+CLASSPATH+=:$FWDIR/third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+CLASSPATH+=:$FWDIR/third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
+CLASSPATH+=:$FWDIR/third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
+CLASSPATH+=:$FWDIR/third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
+CLASSPATH+=:$FWDIR/third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
CLASSPATH+=:$jar
done
-export CLASSPATH
+export CLASSPATH # Needed for spark-shell
if [ -n "$SCALA_HOME" ]; then
SCALA=${SCALA_HOME}/bin/scala
View
@@ -1,5 +1,4 @@
#!/bin/sh
-echo "In spark-executor"
FWDIR="`dirname $0`"
-echo Framework dir: $FWDIR
+echo "Running spark-executor with framework dir = $FWDIR"
exec $FWDIR/run spark.Executor
@@ -10,19 +10,15 @@ object BroadcastTest {
val slices = if (args.length > 1) args(1).toInt else 2
val num = if (args.length > 2) args(2).toInt else 1000000
- var arr1 = new Array[Int](num)
- for (i <- 0 until arr1.length)
- arr1(i) = i
+ var arr = new Array[Int](num)
+ for (i <- 0 until arr.length)
+ arr(i) = i
-// var arr2 = new Array[Int](num * 2)
-// for (i <- 0 until arr2.length)
-// arr2(i) = i
-
- val barr1 = spark.broadcast(arr1)
-// val barr2 = spark.broadcast(arr2)
+ val barr = spark.broadcast(arr)
spark.parallelize(1 to 10, slices).foreach {
-// i => println(barr1.value.size + barr2.value.size)
- i => println(barr1.value.size)
+ println("in task: barr = " + barr)
+ i => println(barr.value.size)
}
}
}
+
@@ -123,8 +123,6 @@ object SparkALS {
var msc = spark.broadcast(ms)
var usc = spark.broadcast(us)
for (iter <- 1 to ITERATIONS) {
- val start = System.nanoTime
-
println("Iteration " + iter + ":")
ms = spark.parallelize(0 until M, slices)
.map(i => updateMovie(i, msc.value(i), usc.value, Rc.value))
@@ -136,9 +134,6 @@ object SparkALS {
usc = spark.broadcast(us) // Re-broadcast us because it was updated
println("RMSE = " + rmse(R, ms, us))
println()
-
- val time = (System.nanoTime - start) / 1e9
- println( "This iteration took " + time + " s")
}
}
}
@@ -57,7 +57,7 @@ object Vector {
implicit def doubleToMultiplier(num: Double) = new Multiplier(num)
implicit object VectorAccumParam extends spark.AccumulatorParam[Vector] {
- def add(t1: Vector, t2: Vector) = t1 + t2
+ def addInPlace(t1: Vector, t2: Vector) = t1 + t2
def zero(initialValue: Vector) = Vector.zeros(initialValue.length)
}
}
@@ -4,15 +4,17 @@ import java.io._
import scala.collection.mutable.Map
-@serializable class Accumulator[T](initialValue: T, param: AccumulatorParam[T])
+@serializable class Accumulator[T](
+ @transient initialValue: T, param: AccumulatorParam[T])
{
val id = Accumulators.newId
- @transient var value_ = initialValue
+ @transient var value_ = initialValue // Current value on master
+ val zero = param.zero(initialValue) // Zero value to be passed to workers
var deserialized = false
Accumulators.register(this)
- def += (term: T) { value_ = param.add(value_, term) }
+ def += (term: T) { value_ = param.addInPlace(value_, term) }
def value = this.value_
def value_= (t: T) {
if (!deserialized) value_ = t
@@ -22,7 +24,7 @@ import scala.collection.mutable.Map
// Called by Java when deserializing an object
private def readObject(in: ObjectInputStream) {
in.defaultReadObject
- value_ = param.zero(initialValue)
+ value_ = zero
deserialized = true
Accumulators.register(this)
}
@@ -31,7 +33,7 @@ import scala.collection.mutable.Map
}
@serializable trait AccumulatorParam[T] {
- def add(t1: T, t2: T): T
+ def addInPlace(t1: T, t2: T): T
def zero(initialValue: T): T
}
Oops, something went wrong.

0 comments on commit 0d67bc1

Please sign in to comment.