Skip to content

Commit

Permalink
added documentation builds as a top level test component, altered hig…
Browse files Browse the repository at this point in the history
…h level project changes to properly execute core tests only when necessary, changed variable names for simplicity
  • Loading branch information
Brennon York committed Jun 15, 2015
1 parent ec1ae78 commit aa03d9e
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 29 deletions.
11 changes: 6 additions & 5 deletions dev/run-tests-codes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ readonly BLOCK_GENERAL=10
readonly BLOCK_RAT=11
readonly BLOCK_SCALA_STYLE=12
readonly BLOCK_PYTHON_STYLE=13
readonly BLOCK_BUILD=14
readonly BLOCK_MIMA=15
readonly BLOCK_SPARK_UNIT_TESTS=16
readonly BLOCK_PYSPARK_UNIT_TESTS=17
readonly BLOCK_SPARKR_UNIT_TESTS=18
readonly BLOCK_DOCUMENTATION=14
readonly BLOCK_BUILD=15
readonly BLOCK_MIMA=16
readonly BLOCK_SPARK_UNIT_TESTS=17
readonly BLOCK_PYSPARK_UNIT_TESTS=18
readonly BLOCK_SPARKR_UNIT_TESTS=19
2 changes: 2 additions & 0 deletions dev/run-tests-jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ done
failing_test="Scala style tests"
elif [ "$test_result" -eq "$BLOCK_PYTHON_STYLE" ]; then
failing_test="Python style tests"
elif [ "$test_result" -eq "$BLOCK_DOCUMENTATION" ]; then
failing_test="to generate documentation"
elif [ "$test_result" -eq "$BLOCK_BUILD" ]; then
failing_test="to build"
elif [ "$test_result" -eq "$BLOCK_MIMA" ]; then
Expand Down
71 changes: 47 additions & 24 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ def run_python_style_checks():
run_cmd(["./dev/lint-python"])


def build_spark_documentation():
set_title_and_block("Building Spark Documentation", "BLOCK_DOCUMENTATION")
os.environ["PRODUCTION"] = "1 jekyll build"


def exec_maven(mvn_args=[]):
"""Will call Maven in the current directory with the list of mvn_args passed
in and returns the subprocess for any further processing"""
Expand Down Expand Up @@ -215,21 +220,26 @@ def get_hadoop_profiles(hadoop_version):

def get_build_profiles(hadoop_version="hadoop2.3",
enable_base_profiles=True,
enable_hive_profiles=False):
enable_hive_profiles=False,
enable_doc_profiles=False):
"""Returns a list of hadoop profiles to be used as looked up from the passed in hadoop profile
key with the option of adding on the base and hive profiles."""

base_profiles = ["-Pkinesis-asl"]
hive_profiles = ["-Phive", "-Phive-thriftserver"]
doc_profiles = []
hadoop_profiles = get_hadoop_profiles(hadoop_version)

build_profiles = hadoop_profiles

if enable_base_profiles:
build_profiles = build_profiles + base_profiles
build_profiles += base_profiles

if enable_hive_profiles:
build_profiles = build_profiles + hive_profiles
build_profiles += hive_profiles

if enable_doc_profiles:
build_profiles += doc_profiles

return build_profiles

Expand Down Expand Up @@ -259,7 +269,7 @@ def build_spark_sbt(hadoop_version):
exec_sbt(profiles_and_goals)


def build_apache_spark(build_tool, hadoop_version):
def build_apache_spark(build_tool, hadoop_version, changed_modules):
"""Will build Spark against Hive v0.13.1 given the passed in build tool (either `sbt` or
`maven`). Defaults to using `sbt`."""

Expand All @@ -284,7 +294,7 @@ def identify_changed_modules(test_env):
If run under the `amplab_jenkins` environment will determine the changed files
as compared to the `ghprbTargetBranch` and execute the necessary set of tests
to provide coverage for the changed code."""
test_suite = set()
changed_modules = set()

if test_env == "amplab_jenkins":
target_branch = os.environ["ghprbTargetBranch"]
Expand All @@ -295,7 +305,6 @@ def identify_changed_modules(test_env):
# remove any empty strings
changed_files = [f for f in raw_output.split('\n') if f]

# find any sql files
sql_files = [f for f in changed_files
if any(f.startswith(p) for p in
["sql/",
Expand All @@ -322,31 +331,39 @@ def identify_changed_modules(test_env):
if any(f.startswith(p) for p in
["examples/src/main/scala/org/apache/spark/examples/graphx/",
"graphx/"])]

non_sql_files = set(changed_files).difference(set(sql_files))

if non_sql_files:
test_suite.add("CORE")
doc_files = [f for f in changed_files if f.startswith("docs/")]

# union together all changed top level project files
top_level_project_files = set().union([set(f) for f in [sql_files,
mllib_files,
streaming_files,
graphx_files,
doc_files]])
changed_core_files = set(changed_files).difference(top_level_project_files)

if changed_core_files:
changed_modules.add("CORE")
if sql_files:
print "[info] Detected changes in SQL. Will run Hive test suite."
test_suite.add("SQL")
if not non_sql_files:
print "[info] Detected no changes except in SQL. Will only run SQL tests."
changed_modules.add("SQL")
if mllib_files:
print "[info] Detected changes in MLlib. Will run MLlib test suite."
test_suite.add("MLLIB")
changed_modules.add("MLLIB")
if streaming_files:
print "[info] Detected changes in Streaming. Will run Streaming test suite."
test_suite.add("STREAMING")
changed_modules.add("STREAMING")
if graphx_files:
print "[info] Detected changes in GraphX. Will run GraphX test suite."
test_suite.add("GRAPHX")
changed_modules.add("GRAPHX")
if doc_files:
print "[info] Detected changes in documentation. Will build spark with documentation."
changed_modules.add("DOCS")

return test_suite
return changed_modules
else:
# we aren't in the Amplab environment so simply run all tests
test_suite.add("ALL")
return test_suite
changed_modules.add("ALL")
return changed_modules


def run_scala_tests_maven(test_profiles):
Expand Down Expand Up @@ -482,15 +499,21 @@ def main():
run_scala_style_checks()
run_python_style_checks()

# determine high level changes
changed_modules = identify_changed_modules(test_env)

# determine if docs were changed and if we're inside the amplab environment
if "DOCS" in changed_modules and test_env == "amplab_jenkins":
build_spark_documentation()

# spark build
build_apache_spark(build_tool, hadoop_version)
build_apache_spark(build_tool, hadoop_version, changed_modules)

# backwards compatibility checks
detect_binary_inop_with_mima()

# test suites
test_modules = identify_changed_modules(test_env)
run_scala_tests(build_tool, hadoop_version, test_modules)
# run the test suites
run_scala_tests(build_tool, hadoop_version, changed_modules)
run_python_tests()
run_sparkr_tests()

Expand Down
1 change: 1 addition & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ title: Configuration

Spark provides three locations to configure the system:

* ADDING DOC CHANGE FOR TESTING
* [Spark properties](#spark-properties) control most application parameters and can be set by using
a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object, or through Java
system properties.
Expand Down

0 comments on commit aa03d9e

Please sign in to comment.