Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

Commit

Permalink
Merge branch 'master' into 'feature/systemtest'
Browse files Browse the repository at this point in the history
resolved conflict pom.xml
  • Loading branch information
amaya382 committed Sep 7, 2016
2 parents bca5404 + df4f4dd commit 242dfaf
Show file tree
Hide file tree
Showing 153 changed files with 15,272 additions and 89 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Expand Up @@ -15,3 +15,5 @@ scalastyle-output.xml
scalastyle.txt
derby.log
spark/bin/zinc-*
*.dylib
*.so
9 changes: 8 additions & 1 deletion .travis.yml
Expand Up @@ -28,10 +28,17 @@ branches:
only:
- master
- develop
- dev/spark

before_install:
- mvn validate -Pxgboost

notifications:
email: false

script:
- mvn -q test -Pspark-2.0
# test the spark-1.6 module only in this second run
- mvn -q test -Pspark-1.6 -Dtest=org.apache.spark.*

after_success:
- mvn clean cobertura:cobertura coveralls:report
4 changes: 2 additions & 2 deletions README.md
Expand Up @@ -112,7 +112,7 @@ _My recommendation for is AROW regression, AdaDelta, and Factorization Machine w
System requirements
--------------------

* Hive 0.11 or later
* Hive 0.12 or later

* Java 7 or later

Expand All @@ -129,7 +129,7 @@ Copyright
---------

```
Copyright (C) 2015 Makoto YUI
Copyright (C) 2015-2016 Makoto YUI
Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
```

Expand Down
64 changes: 64 additions & 0 deletions bin/build_xgboost.sh
@@ -0,0 +1,64 @@
#!/bin/bash

# Hivemall: Hive scalable Machine Learning Library
#
# Copyright (C) 2015 Makoto YUI
# Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -eu
set -o pipefail

# Target commit hash value
XGBOOST_HASHVAL='85443403310e90bd8a90a1f817841520838b4ac7'

# Move to a top directory
if [ "$HIVEMALL_HOME" == "" ]; then
if [ -e ../bin/${0##*/} ]; then
HIVEMALL_HOME=".."
elif [ -e ./bin/${0##*/} ]; then
HIVEMALL_HOME="."
else
echo "env HIVEMALL_HOME not defined"
exit 1
fi
fi

cd $HIVEMALL_HOME

# Final output dir for a custom-compiled xgboost binary
HIVEMALL_LIB_DIR="$HIVEMALL_HOME/xgboost/src/main/resources/lib/"
rm -rf $HIVEMALL_LIB_DIR >> /dev/null
mkdir -p $HIVEMALL_LIB_DIR

# Move to an output directory
XGBOOST_OUT="$HIVEMALL_HOME/target/xgboost-$XGBOOST_HASHVAL"
rm -rf $XGBOOST_OUT >> /dev/null
mkdir -p $XGBOOST_OUT
cd $XGBOOST_OUT

# Fetch xgboost sources
git clone --progress https://github.com/maropu/xgboost.git
cd xgboost
git checkout $XGBOOST_HASHVAL

# Resolve dependent sources
git submodule init
git submodule update

# Copy a built binary to the output
cd jvm-packages
ENABLE_STATIC_LINKS=1 ./create_jni.sh
cp ./lib/libxgboost4j.* "$HIVEMALL_LIB_DIR"

4 changes: 2 additions & 2 deletions core/.classpath
Expand Up @@ -22,12 +22,12 @@
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
Expand Down
14 changes: 13 additions & 1 deletion core/pom.xml
Expand Up @@ -24,7 +24,7 @@
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.11.0</version>
<version>0.12.0</version>
<scope>provided</scope>
<exclusions>
<exclusion>
Expand Down Expand Up @@ -89,13 +89,25 @@
<artifactId>smile-core</artifactId>
<version>1.0.3</version>
<scope>compile</scope>
<exclusions>
<exclusion>
<artifactId>smile-graph</artifactId>
<groupId>com.github.haifengl</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.tukaani</groupId>
<artifactId>xz</artifactId>
<version>1.5</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
<scope>compile</scope>
</dependency>

<!-- test scope -->
<dependency>
Expand Down
107 changes: 107 additions & 0 deletions core/src/main/java/hivemall/anomaly/ChangeFinder1D.java
@@ -0,0 +1,107 @@
/*
* Hivemall: Hive scalable Machine Learning Library
*
* Copyright (C) 2015 Makoto YUI
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package hivemall.anomaly;

import hivemall.anomaly.ChangeFinderUDF.ChangeFinder;
import hivemall.anomaly.ChangeFinderUDF.LossFunction;
import hivemall.anomaly.ChangeFinderUDF.Parameters;
import hivemall.utils.collections.DoubleRingBuffer;

import javax.annotation.Nonnull;

import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;

final class ChangeFinder1D implements ChangeFinder {

@Nonnull
private final PrimitiveObjectInspector oi;
@Nonnull
private final LossFunction lossFunc1;
@Nonnull
private final LossFunction lossFunc2;

@Nonnull
private final SDAR1D sdar1, sdar2;
@Nonnull
private final DoubleRingBuffer xRing, yRing;
@Nonnull
private final double[] xSeries, ySeries;
@Nonnull
private final DoubleRingBuffer outlierScores, changepointScores;

ChangeFinder1D(@Nonnull Parameters params, @Nonnull PrimitiveObjectInspector oi) {
this.oi = oi;
this.lossFunc1 = params.lossFunc1;
this.lossFunc2 = params.lossFunc2;
int k = params.k;
this.sdar1 = new SDAR1D(params.r1, k);
this.sdar2 = new SDAR1D(params.r2, k);
this.xRing = new DoubleRingBuffer(k + 1);
this.yRing = new DoubleRingBuffer(k + 1);
this.xSeries = new double[k + 1];
this.ySeries = new double[k + 1];
this.outlierScores = new DoubleRingBuffer(params.T1);
this.changepointScores = new DoubleRingBuffer(params.T2);
}

@Override
public void update(@Nonnull final Object arg, @Nonnull final double[] outScores)
throws HiveException {
double x = PrimitiveObjectInspectorUtils.getDouble(arg, oi);

// [Stage#1] Outlier Detection
xRing.add(x).toArray(xSeries, false /* LIFO */);
int k1 = xRing.size() - 1;
double x_hat = sdar1.update(xSeries, k1);

double scoreX = (k1 == 0.d) ? 0.d : loss(sdar1, x, x_hat, lossFunc1);
// smoothing
double y = ChangeFinderUDF.smoothing(outlierScores.add(scoreX));

// [Stage#2] Change-point Detection
yRing.add(y).toArray(ySeries, false /* LIFO */);
int k2 = yRing.size() - 1;
double y_hat = sdar2.update(ySeries, k2);

// <LogLoss>
double lossY = (k2 == 0.d) ? 0.d : loss(sdar2, y, y_hat, lossFunc2);
double scoreY = ChangeFinderUDF.smoothing(changepointScores.add(lossY));

outScores[0] = scoreX;
outScores[1] = scoreY;
}

private static double loss(@Nonnull final SDAR1D sdar, @Nonnull final double actual,
@Nonnull final double predicted, @Nonnull final LossFunction lossFunc) {
final double loss;
switch (lossFunc) {
case hellinger:
double h2d = sdar.hellingerDistance();
loss = h2d * 100.d;
break;
case logloss:
loss = sdar.logLoss(actual, predicted);
break;
default:
throw new IllegalStateException("Unexpected loss function: " + lossFunc);
}
return loss;
}
}

0 comments on commit 242dfaf

Please sign in to comment.