Skip to content

Commit

Permalink
Merge branch 'hive0.9' of https://github.com/amplab/shark into alpha-…
Browse files Browse the repository at this point in the history
…0.2.0
  • Loading branch information
sameeragarwal committed Feb 2, 2014
2 parents 3a644b4 + 314a90f commit e338be4
Show file tree
Hide file tree
Showing 139 changed files with 9,587 additions and 2,205 deletions.
40 changes: 40 additions & 0 deletions bin/dev/release_cleanup.sh
@@ -0,0 +1,40 @@
#!/bin/sh

# Copyright (C) 2012 The Regents of The University California.
# All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

DEVDIR="`dirname $0`"
BINDIR="`dirname $DEVDIR`"
FWDIR="`dirname $BINDIR`"

rm -rf $FWDIR/run-tests-from-scratch-workspace
rm -rf $FWDIR/test_warehouses

rm -rf $FWDIR/conf/shark-env.sh

rm -rf $FWDIR/metastore_db
rm -rf $FWDIR/derby.log

rm -rf $FWDIR/project/target $FWDIR/project/project/target

rm -rf $FWDIR/target/resolution-cache
rm -rf $FWDIR/target/streams
rm -rf $FWDIR/target/scala-*/cache
rm -rf $FWDIR/target/scala-*/classes
rm -rf $FWDIR/target/scala-*/test-classes

find $FWDIR -name ".DS_Store" -exec rm {} \;
find $FWDIR -name ".history" -exec rm {} \;

22 changes: 12 additions & 10 deletions bin/dev/run-tests-from-scratch
Expand Up @@ -12,10 +12,11 @@
# Set up config vars using env vars or defaults; parse cmd line flags.
#####################################################################
SHARK_PROJ_DIR_DEFAULT="$(cd `dirname $0`/../../; pwd)"
SBT_OPTS_DEFAULT="-Xms512M -Xmx2048M -Xss1M -XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=256m -XX:+UseCodeCacheFlushing"
SPARK_MEM_DEFAULT=4g
SHARK_MASTER_MEM_DEFAULT=4g
SPARK_KV_JAVA_OPTS_DEFAULT=("-Dspark.local.dir=/tmp " "-Dspark.kryoserializer.buffer.mb=10 ")
SPARK_GIT_URL_DEFAULT="https://github.com/mesos/spark.git"
SPARK_GIT_URL_DEFAULT="https://github.com/apache/incubator-spark.git spark"
HIVE_GIT_URL_DEFAULT="https://github.com/amplab/hive.git -b shark-0.9"
SPARK_HADOOP_VERSION_DEFAULT="1.0.4"
SPARK_WITH_YARN_DEFAULT=false
Expand Down Expand Up @@ -49,6 +50,10 @@ else
fi
fi

if [ "x$SBT_OPTS" == "x" ] ; then
SBT_OPTS=$SBT_OPTS_DEFAULT
fi

if [ "x$SPARK_MEM" == "x" ] ; then
export SPARK_MEM=$SPARK_MEM_DEFAULT
fi
Expand Down Expand Up @@ -117,6 +122,7 @@ Required Options:
Optional configuration environment variables:
SHARK_PROJ_DIR (default: "$SHARK_PROJ_DIR_DEFAULT")
SCALA_HOME (default: Scala version ${SCALA_VERSION} will be downloaded and used)
SBT_OPTS (default: "$SBT_OPTS_DEFAULT")
SPARK_MEM (default: $SPARK_MEM_DEFAULT)
SHARK_MASTER_MEM (default: $SHARK_MASTER_MEM_DEFAULT)
SPARK_JAVA_OPTS (default: "${SPARK_KV_JAVA_OPTS_DEFAULT[@]}")
Expand Down Expand Up @@ -226,6 +232,7 @@ fi
# Download Scala if SCALA_HOME is not specified.
####################################################################
if [ "x$SCALA_HOME" == "x" ] ; then
rm -rf ./scala*tgz
wget $SCALA_DOWNLOAD_PATH
tar xvfz scala*tgz
export SCALA_HOME="$WORKSPACE/scala-$SCALA_VERSION"
Expand All @@ -251,7 +258,8 @@ else
export SPARK_HADOOP_VERSION=$SPARK_HADOOP_VERSION
export SPARK_WITH_YARN=$SPARK_WITH_YARN
# Build spark and push the jars to local Ivy/Maven caches.
sbt/sbt clean publish-local
wget -nc http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/0.13.0/sbt-launch.jar
java $SBT_OPTS -jar sbt-launch.jar clean publish-local
popd
fi
export SPARK_HOME="$WORKSPACE/spark"
Expand All @@ -274,17 +282,11 @@ export HADOOP_HOME="$WORKSPACE/hadoop-${SPARK_HADOOP_VERSION}"
# Download and build Hive.
#####################################################################
if $SKIP_HIVE ; then
if [ ! -e "hive" -o ! -e "hive-warehouse" ] ; then
echo "hive and hive-warehouse dirs must exist when skipping Hive download and build stage."
if [ ! -e "hive" ] ; then
echo "hive dir must exist when skipping Hive download and build stage."
exit -1
fi
else
# Setup the Hive warehouse directory.
HIVE_WAREHOUSE=./hive-warehouse
rm -rf $HIVE_WAREHOUSE
mkdir -p $HIVE_WAREHOUSE
chmod 0777 $HIVE_WAREHOUSE

rm -rf hive
git clone $HIVE_GIT_URL
pushd hive
Expand Down
4 changes: 0 additions & 4 deletions bin/ext/sharkserver.sh
Expand Up @@ -18,10 +18,6 @@
THISSERVICE=sharkserver
export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "

# Use Java to launch Shark otherwise the unit tests cannot properly kill
# the server process.
export SHARK_LAUNCH_WITH_JAVA=1

sharkserver() {
echo "Starting the Shark Server"
exec $FWDIR/run shark.SharkServer "$@"
Expand Down
5 changes: 5 additions & 0 deletions conf/blinkdb-env.sh.template
Expand Up @@ -39,6 +39,11 @@ export HIVE_HOME=""
# Only required if using Mesos:
#export MESOS_NATIVE_LIBRARY=/usr/local/lib/libmesos.so

# Only required if run shark with spark on yarn
#export SHARK_EXEC_MODE=yarn
#export SPARK_ASSEMBLY_JAR=
#export SHARK_ASSEMBLY_JAR=

# (Optional) Extra classpath
#export SPARK_LIBRARY_PATH=""

Expand Down
43 changes: 29 additions & 14 deletions project/SharkBuild.scala
Expand Up @@ -21,23 +21,32 @@ import Keys._
import sbtassembly.Plugin._
import AssemblyKeys._

import scala.util.Properties.{ envOrNone => env }

object SharkBuild extends Build {

val BLINKDB_VERSION = "0.1.0-SNAPSHOT"

// Shark version
val SHARK_VERSION = "0.8.0-SNAPSHOT"
val SHARK_VERSION = "0.9.0-hive0.9-SNAPSHOT"

val SPARK_VERSION = "0.8.0-SNAPSHOT"
val SPARK_VERSION = "0.9.0-incubating"

val SCALA_VERSION = "2.9.3"
val SCALA_VERSION = "2.10.3"

// Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or
// "1.0.1" for Apache releases, or "0.20.2-cdh3u3" for Cloudera Hadoop.
val HADOOP_VERSION = "1.0.4"
val DEFAULT_HADOOP_VERSION = "1.0.4"

lazy val hadoopVersion = env("SHARK_HADOOP_VERSION") orElse
env("SPARK_HADOOP_VERSION") getOrElse
DEFAULT_HADOOP_VERSION

// Whether to build Shark with Yarn support
val YARN_ENABLED = env("SHARK_YARN").getOrElse("false").toBoolean

// Whether to build Shark with Tachyon jar.
val TACHYON_ENABLED = false
val TACHYON_ENABLED = true

lazy val root = Project(
id = "root",
Expand All @@ -47,29 +56,34 @@ object SharkBuild extends Build {
val excludeKyro = ExclusionRule(organization = "de.javakaffee")
val excludeHadoop = ExclusionRule(organization = "org.apache.hadoop")
val excludeNetty = ExclusionRule(organization = "org.jboss.netty")
val excludeCurator = ExclusionRule(organization = "org.apache.curator")
val excludeJackson = ExclusionRule(organization = "org.codehaus.jackson")
val excludeAsm = ExclusionRule(organization = "asm")
val excludeSnappy = ExclusionRule(organization = "org.xerial.snappy")

def coreSettings = Defaults.defaultSettings ++ Seq(

name := "shark",
organization := "edu.berkeley.cs.amplab",
version := SHARK_VERSION,
scalaVersion := SCALA_VERSION,
scalacOptions := Seq("-deprecation", "-unchecked", "-optimize"),
scalacOptions := Seq("-deprecation", "-unchecked", "-optimize", "-feature", "-Yinline-warnings"),
parallelExecution in Test := false,

// Download managed jars into lib_managed.
retrieveManaged := true,
resolvers ++= Seq(
"Typesafe Repository" at "http://repo.typesafe.com/typesafe/releases/",
"JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/",
"Spray Repository" at "http://repo.spray.cc/",
"Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/",
"Local Maven" at Path.userHome.asFile.toURI.toURL + ".m2/repository"
),

fork := true,
javaOptions += "-XX:MaxPermSize=512m",
javaOptions += "-Xmx2g",
javaOptions += "-Dsun.io.serialization.extendedDebugInfo=true",

testOptions in Test += Tests.Argument("-oF"), // Full stack trace on test failures

testListeners <<= target.map(
t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))),
Expand Down Expand Up @@ -102,7 +116,7 @@ object SharkBuild extends Build {
"org.apache.spark" %% "spark-core" % SPARK_VERSION,
"org.apache.spark" %% "spark-repl" % SPARK_VERSION,
"com.google.guava" % "guava" % "14.0.1",
"org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeNetty),
"org.apache.hadoop" % "hadoop-client" % hadoopVersion excludeAll(excludeJackson, excludeNetty, excludeAsm) force(),
// See https://code.google.com/p/guava-libraries/issues/detail?id=1095
"com.google.code.findbugs" % "jsr305" % "1.3.+",

Expand All @@ -114,21 +128,22 @@ object SharkBuild extends Build {
// Test infrastructure
"org.scalatest" %% "scalatest" % "1.9.1" % "test",
"junit" % "junit" % "4.10" % "test",
"net.java.dev.jets3t" % "jets3t" % "0.9.0",
"net.java.dev.jets3t" % "jets3t" % "0.7.1",
"com.novocode" % "junit-interface" % "0.8" % "test") ++
(if (TACHYON_ENABLED) Some("org.tachyonproject" % "tachyon" % "0.3.0-SNAPSHOT" excludeAll(excludeKyro, excludeHadoop) ) else None).toSeq
)
(if (YARN_ENABLED) Some("org.apache.spark" %% "spark-yarn" % SPARK_VERSION) else None).toSeq ++
(if (TACHYON_ENABLED) Some("org.tachyonproject" % "tachyon" % "0.3.0" excludeAll(excludeKyro, excludeHadoop, excludeCurator, excludeJackson, excludeNetty, excludeAsm)) else None).toSeq
) ++ org.scalastyle.sbt.ScalastylePlugin.Settings

def assemblyProjSettings = Seq(
name := "shark-assembly",
jarName in assembly <<= version map { v => "shark-assembly-" + v + "-hadoop" + HADOOP_VERSION + ".jar" }
jarName in assembly <<= version map { v => "shark-assembly-" + v + "-hadoop" + hadoopVersion + ".jar" }
) ++ assemblySettings ++ extraAssemblySettings

def extraAssemblySettings() = Seq(
test in assembly := {},
mergeStrategy in assembly := {
case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard
case m if m.toLowerCase.matches("meta-inf.*\\.sf$") => MergeStrategy.discard
case "META-INF/services/org.apache.hadoop.fs.FileSystem" => MergeStrategy.concat
case "reference.conf" => MergeStrategy.concat
case _ => MergeStrategy.first
}
Expand Down
4 changes: 4 additions & 0 deletions project/plugins.sbt
Expand Up @@ -15,6 +15,8 @@

addSbtPlugin("org.ensime" % "ensime-sbt-cmd" % "0.1.1")

addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.3.2")

addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.2.0")

addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.4.0")
Expand All @@ -24,3 +26,5 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.9.2")
resolvers += Resolver.url(
"sbt-plugin-releases",
new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns)

resolvers += "sonatype-releases" at "https://oss.sonatype.org/content/repositories/releases/"
53 changes: 34 additions & 19 deletions run
@@ -1,9 +1,9 @@
#!/bin/bash

# This file is used to launch Shark on the master.
export SCALA_VERSION=2.9.3
SHARK_VERSION=0.8.0-SNAPSHOT
BLINKDB_VERSION=0.1.0-SNAPSHOT
export SCALA_VERSION=2.10
SHARK_VERSION=0.9.0-SNAPSHOT
BLINKDB_VERSION=0.2.0-SNAPSHOT

# Figure out where the framework is installed
FWDIR="$(cd `dirname $0`; pwd)"
Expand Down Expand Up @@ -48,6 +48,26 @@ if [ -n "$MASTER" ] ; then
fi
fi

# check for shark with spark on yarn params
if [ "x$SHARK_EXEC_MODE" == "xyarn" ] ; then
if [ "x$SPARK_ASSEMBLY_JAR" == "x" ] ; then
echo "No SPARK_ASSEMBLY_JAR specified. Please set SPARK_ASSEMBLY_JAR for spark on yarn mode."
exit 1
else
export SPARK_JAR=$SPARK_ASSEMBLY_JAR
fi

if [ "x$SHARK_ASSEMBLY_JAR" == "x" ] ; then
echo "No SHARK_ASSEMBLY_JAR specified. please set SHARK_ASSEMBLY_JAR for spark on yarn mode."
exit 1
else
export SPARK_YARN_APP_JAR = $SHARK_ASSEMBLY_JAR
fi

# use yarn-client mode for interactive shell.
export MASTER=yarn-client
fi

# Check for optionally specified configuration file path
if [ "x$HIVE_CONF_DIR" == "x" ] ; then
HIVE_CONF_DIR="$HIVE_HOME/conf"
Expand Down Expand Up @@ -110,9 +130,10 @@ SPARK_CLASSPATH+=":$SHARK_HOME/target/scala-$SCALA_VERSION/test-classes"


if [ "x$HADOOP_HOME" == "x" ] ; then
echo "No HADOOP_HOME specified. Shark will run in local-mode"
echo "No HADOOP_HOME specified. Shark will run in local-mode"
else
SPARK_CLASSPATH+=:$HADOOP_HOME/conf
SPARK_CLASSPATH+=:$HADOOP_HOME/etc/hadoop
SPARK_CLASSPATH+=:$HADOOP_HOME/conf
fi


Expand Down Expand Up @@ -141,22 +162,16 @@ export JAVA_OPTS
export ANT_OPTS=$JAVA_OPTS

if [ "x$RUNNER" == "x" ] ; then
if [ "$SHARK_LAUNCH_WITH_JAVA" == "1" ]; then
CLASSPATH+=":$SCALA_HOME/lib/scala-library.jar"
CLASSPATH+=":$SCALA_HOME/lib/scala-compiler.jar"
CLASSPATH+=":$SCALA_HOME/lib/jline.jar"
if [ -n "$JAVA_HOME" ]; then
RUNNER="${JAVA_HOME}/bin/java"
else
RUNNER=java
fi
# The JVM doesn't read JAVA_OPTS by default so we need to pass it in
EXTRA_ARGS="$JAVA_OPTS"
CLASSPATH+=":$SCALA_HOME/lib/scala-library.jar"
CLASSPATH+=":$SCALA_HOME/lib/scala-compiler.jar"
CLASSPATH+=":$SCALA_HOME/lib/jline.jar"
if [ -n "$JAVA_HOME" ]; then
RUNNER="${JAVA_HOME}/bin/java"
else
SCALA=${SCALA_HOME}/bin/scala
RUNNER="$SCALA -cp \"$CLASSPATH\""
EXTRA_ARGS=""
RUNNER=java
fi
# The JVM doesn't read JAVA_OPTS by default so we need to pass it in
EXTRA_ARGS="$JAVA_OPTS"
fi

exec $RUNNER $EXTRA_ARGS "$@"
6 changes: 3 additions & 3 deletions sbt/sbt
Expand Up @@ -5,9 +5,9 @@ if [ -e $BLINKDB_CONF_DIR/blinkdb-env.sh ] ; then
. $BLINKDB_CONF_DIR/blinkdb-env.sh
fi

if [[ "$@" == *"test"* ]]; then
if [ "x$HIVE_DEV_HOME" == "x" ]; then
echo "No HIVE_DEV_HOME specified. Required for tests. Please set HIVE_DEV_HOME."
if [[ "$@" == *"test"* ]] || [[ "$@" == "eclipse" ]]; then
if [[ "x$HIVE_DEV_HOME" == "x" ]]; then
echo "No HIVE_DEV_HOME specified. Required for tests and eclipse. Please set HIVE_DEV_HOME."
exit 1
fi
fi
Expand Down

0 comments on commit e338be4

Please sign in to comment.