Skip to content

Commit

Permalink
Merge branch 'master' of github.com:apache/spark
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewor14 committed Mar 24, 2014
2 parents d5154da + 21109fb commit a9eae7e
Show file tree
Hide file tree
Showing 126 changed files with 2,876 additions and 926 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ dist/
spark-*-bin.tar.gz
unit-tests.log
/lib/
rat-results.txt
39 changes: 39 additions & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
target
.gitignore
.project
.classpath
.rat-excludes
.*md
derby.log
TAGS
RELEASE
control
docs
fairscheduler.xml.template
log4j.properties
log4j.properties.template
metrics.properties.template
slaves
spark-env.sh
spark-env.sh.template
log4j-defaults.properties
sorttable.js
.*txt
.*data
.*log
cloudpickle.py
join.py
SparkExprTyper.scala
SparkILoop.scala
SparkILoopInit.scala
SparkIMain.scala
SparkImports.scala
SparkJLineCompletion.scala
SparkJLineReader.scala
SparkMemberHandlers.scala
sbt
sbt-launch-lib.bash
plugins.sbt
work
.*\.q
golden
9 changes: 9 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,12 @@ Copyright 2014 The Apache Software Foundation.

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

In addition, this product includes:

- JUnit (http://www.junit.org) is a testing framework for Java. We included it
under the terms of the Eclipse Public License v1.0.

- JTransforms (https://sites.google.com/site/piotrwendykier/software/jtransforms)
provides fast transforms in Java. It is tri-licensed, and we included it under
the terms of the Mozilla Public License v1.1.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ private[ui] class StageTable(stages: Seq[StageInfo], parent: JobProgressUI) {
val startWidth = "width: %s%%".format((started.toDouble/total)*100)

<div class="progress">
<span style="text-align:center; position:absolute; width:100%;">
<span style="text-align:center; position:absolute; width:100%; left:0;">
{completed}/{total} {failed}
</span>
<div class="bar bar-completed" style={completeWidth}></div>
Expand Down
81 changes: 81 additions & 0 deletions dev/check-license
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#


acquire_rat_jar () {

URL1="http://search.maven.org/remotecontent?filepath=org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
URL2="http://repo1.maven.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"

JAR=$rat_jar

if [[ ! -f "$rat_jar" ]]; then
# Download rat launch jar if it hasn't been downloaded yet
if [ ! -f ${JAR} ]; then
# Download
printf "Attempting to fetch rat\n"
JAR_DL=${JAR}.part
if hash curl 2>/dev/null; then
(curl --progress-bar ${URL1} > ${JAR_DL} || curl --progress-bar ${URL2} > ${JAR_DL}) && mv ${JAR_DL} ${JAR}
elif hash wget 2>/dev/null; then
(wget --progress=bar ${URL1} -O ${JAR_DL} || wget --progress=bar ${URL2} -O ${JAR_DL}) && mv ${JAR_DL} ${JAR}
else
printf "You do not have curl or wget installed, please install rat manually.\n"
exit -1
fi
fi
if [ ! -f ${JAR} ]; then
# We failed to download
printf "Our attempt to download rat locally to ${JAR} failed. Please install rat manually.\n"
exit -1
fi
printf "Launching rat from ${JAR}\n"
fi
}

# Go to the Spark project root directory
FWDIR="$(cd `dirname $0`/..; pwd)"
cd $FWDIR

if test -x "$JAVA_HOME/bin/java"; then
declare java_cmd="$JAVA_HOME/bin/java"
else
declare java_cmd=java
fi

export RAT_VERSION=0.10
export rat_jar=$FWDIR/lib/apache-rat-${RAT_VERSION}.jar
mkdir -p $FWDIR/lib

[[ -f "$rat_jar" ]] || acquire_rat_jar || {
echo "Download failed. Obtain the rat jar manually and place it at $rat_jar"
exit 1
}

$java_cmd -jar $rat_jar -E $FWDIR/.rat-excludes -d $FWDIR > rat-results.txt

ERRORS=$(cat rat-results.txt | grep -e "??")

if test ! -z "$ERRORS"; then
echo "Could not find Apache license headers in the following files:"
echo "$ERRORS"
exit 1
else
echo -e "RAT checks passed."
fi
7 changes: 6 additions & 1 deletion dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ else
fi

JAVA_VERSION=$($java_cmd -version 2>&1 | sed 's/java version "\(.*\)\.\(.*\)\..*"/\1\2/; 1q')
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run, because JDK version is < 1.8."
[ "$JAVA_VERSION" -ge 18 ] && echo "" || echo "[Warn] Java 8 tests will not run because JDK version is < 1.8."

echo "========================================================================="
echo "Running Apache RAT checks"
echo "========================================================================="

dev/check-license

echo "========================================================================="
echo "Running Scala style checks"
Expand Down
138 changes: 0 additions & 138 deletions examples/src/main/java/org/apache/spark/examples/JavaKMeans.java

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,33 @@

package org.apache.spark.mllib.examples;

import java.util.regex.Pattern;

import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

import org.apache.spark.mllib.clustering.KMeans;
import org.apache.spark.mllib.clustering.KMeansModel;

import java.util.Arrays;
import java.util.regex.Pattern;
import org.apache.spark.mllib.linalg.Vector;
import org.apache.spark.mllib.linalg.Vectors;

/**
* Example using MLLib KMeans from Java.
*/
public final class JavaKMeans {

static class ParsePoint implements Function<String, double[]> {
private static class ParsePoint implements Function<String, Vector> {
private static final Pattern SPACE = Pattern.compile(" ");

@Override
public double[] call(String line) {
public Vector call(String line) {
String[] tok = SPACE.split(line);
double[] point = new double[tok.length];
for (int i = 0; i < tok.length; ++i) {
point[i] = Double.parseDouble(tok[i]);
}
return point;
return Vectors.dense(point);
}
}

Expand All @@ -65,15 +66,15 @@ public static void main(String[] args) {

JavaSparkContext sc = new JavaSparkContext(args[0], "JavaKMeans",
System.getenv("SPARK_HOME"), JavaSparkContext.jarOfClass(JavaKMeans.class));
JavaRDD<String> lines = sc.textFile(args[1]);
JavaRDD<String> lines = sc.textFile(inputFile);

JavaRDD<double[]> points = lines.map(new ParsePoint());
JavaRDD<Vector> points = lines.map(new ParsePoint());

KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs);
KMeansModel model = KMeans.train(points.rdd(), k, iterations, runs, KMeans.K_MEANS_PARALLEL());

System.out.println("Cluster centers:");
for (double[] center : model.clusterCenters()) {
System.out.println(" " + Arrays.toString(center));
for (Vector center : model.clusterCenters()) {
System.out.println(" " + center);
}
double cost = model.computeCost(points.rdd());
System.out.println("Cost: " + cost);
Expand Down
5 changes: 5 additions & 0 deletions mllib/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@
<artifactId>jblas</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>org.scalanlp</groupId>
<artifactId>breeze_${scala.binary.version}</artifactId>
<version>0.7</version>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_${scala.binary.version}</artifactId>
Expand Down
Loading

0 comments on commit a9eae7e

Please sign in to comment.