This repository has been archived by the owner on Oct 8, 2019. It is now read-only.
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into 'feature/systemtest'
resolved conflict pom.xml
- Loading branch information
Showing
153 changed files
with
15,270 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,3 +15,5 @@ scalastyle-output.xml | |
scalastyle.txt | ||
derby.log | ||
spark/bin/zinc-* | ||
*.dylib | ||
*.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#!/bin/bash | ||
|
||
# Hivemall: Hive scalable Machine Learning Library | ||
# | ||
# Copyright (C) 2015 Makoto YUI | ||
# Copyright (C) 2013-2015 National Institute of Advanced Industrial Science and Technology (AIST) | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -eu | ||
set -o pipefail | ||
|
||
# Target commit hash value | ||
XGBOOST_HASHVAL='85443403310e90bd8a90a1f817841520838b4ac7' | ||
|
||
# Move to a top directory | ||
if [ "$HIVEMALL_HOME" == "" ]; then | ||
if [ -e ../bin/${0##*/} ]; then | ||
HIVEMALL_HOME=".." | ||
elif [ -e ./bin/${0##*/} ]; then | ||
HIVEMALL_HOME="." | ||
else | ||
echo "env HIVEMALL_HOME not defined" | ||
exit 1 | ||
fi | ||
fi | ||
|
||
cd $HIVEMALL_HOME | ||
|
||
# Final output dir for a custom-compiled xgboost binary | ||
HIVEMALL_LIB_DIR="$HIVEMALL_HOME/xgboost/src/main/resources/lib/" | ||
rm -rf $HIVEMALL_LIB_DIR >> /dev/null | ||
mkdir -p $HIVEMALL_LIB_DIR | ||
|
||
# Move to an output directory | ||
XGBOOST_OUT="$HIVEMALL_HOME/target/xgboost-$XGBOOST_HASHVAL" | ||
rm -rf $XGBOOST_OUT >> /dev/null | ||
mkdir -p $XGBOOST_OUT | ||
cd $XGBOOST_OUT | ||
|
||
# Fetch xgboost sources | ||
git clone --progress https://github.com/maropu/xgboost.git | ||
cd xgboost | ||
git checkout $XGBOOST_HASHVAL | ||
|
||
# Resolve dependent sources | ||
git submodule init | ||
git submodule update | ||
|
||
# Copy a built binary to the output | ||
cd jvm-packages | ||
ENABLE_STATIC_LINKS=1 ./create_jni.sh | ||
cp ./lib/libxgboost4j.* "$HIVEMALL_LIB_DIR" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
107 changes: 107 additions & 0 deletions
107
core/src/main/java/hivemall/anomaly/ChangeFinder1D.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* | ||
* Hivemall: Hive scalable Machine Learning Library | ||
* | ||
* Copyright (C) 2015 Makoto YUI | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package hivemall.anomaly; | ||
|
||
import hivemall.anomaly.ChangeFinderUDF.ChangeFinder; | ||
import hivemall.anomaly.ChangeFinderUDF.LossFunction; | ||
import hivemall.anomaly.ChangeFinderUDF.Parameters; | ||
import hivemall.utils.collections.DoubleRingBuffer; | ||
|
||
import javax.annotation.Nonnull; | ||
|
||
import org.apache.hadoop.hive.ql.metadata.HiveException; | ||
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; | ||
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; | ||
|
||
final class ChangeFinder1D implements ChangeFinder { | ||
|
||
@Nonnull | ||
private final PrimitiveObjectInspector oi; | ||
@Nonnull | ||
private final LossFunction lossFunc1; | ||
@Nonnull | ||
private final LossFunction lossFunc2; | ||
|
||
@Nonnull | ||
private final SDAR1D sdar1, sdar2; | ||
@Nonnull | ||
private final DoubleRingBuffer xRing, yRing; | ||
@Nonnull | ||
private final double[] xSeries, ySeries; | ||
@Nonnull | ||
private final DoubleRingBuffer outlierScores, changepointScores; | ||
|
||
ChangeFinder1D(@Nonnull Parameters params, @Nonnull PrimitiveObjectInspector oi) { | ||
this.oi = oi; | ||
this.lossFunc1 = params.lossFunc1; | ||
this.lossFunc2 = params.lossFunc2; | ||
int k = params.k; | ||
this.sdar1 = new SDAR1D(params.r1, k); | ||
this.sdar2 = new SDAR1D(params.r2, k); | ||
this.xRing = new DoubleRingBuffer(k + 1); | ||
this.yRing = new DoubleRingBuffer(k + 1); | ||
this.xSeries = new double[k + 1]; | ||
this.ySeries = new double[k + 1]; | ||
this.outlierScores = new DoubleRingBuffer(params.T1); | ||
this.changepointScores = new DoubleRingBuffer(params.T2); | ||
} | ||
|
||
@Override | ||
public void update(@Nonnull final Object arg, @Nonnull final double[] outScores) | ||
throws HiveException { | ||
double x = PrimitiveObjectInspectorUtils.getDouble(arg, oi); | ||
|
||
// [Stage#1] Outlier Detection | ||
xRing.add(x).toArray(xSeries, false /* LIFO */); | ||
int k1 = xRing.size() - 1; | ||
double x_hat = sdar1.update(xSeries, k1); | ||
|
||
double scoreX = (k1 == 0.d) ? 0.d : loss(sdar1, x, x_hat, lossFunc1); | ||
// smoothing | ||
double y = ChangeFinderUDF.smoothing(outlierScores.add(scoreX)); | ||
|
||
// [Stage#2] Change-point Detection | ||
yRing.add(y).toArray(ySeries, false /* LIFO */); | ||
int k2 = yRing.size() - 1; | ||
double y_hat = sdar2.update(ySeries, k2); | ||
|
||
// <LogLoss> | ||
double lossY = (k2 == 0.d) ? 0.d : loss(sdar2, y, y_hat, lossFunc2); | ||
double scoreY = ChangeFinderUDF.smoothing(changepointScores.add(lossY)); | ||
|
||
outScores[0] = scoreX; | ||
outScores[1] = scoreY; | ||
} | ||
|
||
private static double loss(@Nonnull final SDAR1D sdar, @Nonnull final double actual, | ||
@Nonnull final double predicted, @Nonnull final LossFunction lossFunc) { | ||
final double loss; | ||
switch (lossFunc) { | ||
case hellinger: | ||
double h2d = sdar.hellingerDistance(); | ||
loss = h2d * 100.d; | ||
break; | ||
case logloss: | ||
loss = sdar.logLoss(actual, predicted); | ||
break; | ||
default: | ||
throw new IllegalStateException("Unexpected loss function: " + lossFunc); | ||
} | ||
return loss; | ||
} | ||
} |
Oops, something went wrong.