Skip to content

Commit

Permalink
(#258) CSVDataFrameParser handles null values, Stacktrace printed for…
Browse files Browse the repository at this point in the history
… error in QueryEngine, and Macrobase -> MacroBase
  • Loading branch information
fabuzaid21 committed Mar 17, 2018
1 parent e560047 commit b56834c
Show file tree
Hide file tree
Showing 28 changed files with 149 additions and 148 deletions.
Expand Up @@ -7,12 +7,9 @@
import edu.stanford.futuredata.macrobase.analysis.summary.aplinear.APLOutlierSummarizer;
import edu.stanford.futuredata.macrobase.analysis.summary.BatchSummarizer;
import edu.stanford.futuredata.macrobase.analysis.summary.fpg.FPGrowthSummarizer;
import edu.stanford.futuredata.macrobase.analysis.summary.ratios.ExplanationMetric;
import edu.stanford.futuredata.macrobase.analysis.summary.ratios.GlobalRatioMetric;
import edu.stanford.futuredata.macrobase.analysis.summary.ratios.RiskRatioMetric;
import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.datamodel.Schema;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -75,7 +72,7 @@ public BasicBatchPipeline (PipelineConfig conf) {
numThreads = conf.get("numThreads", Runtime.getRuntime().availableProcessors());
}

public Classifier getClassifier() throws MacrobaseException {
public Classifier getClassifier() throws MacroBaseException {
switch (classifierType.toLowerCase()) {
case "percentile": {
PercentileClassifier classifier = new PercentileClassifier(metric);
Expand All @@ -93,12 +90,12 @@ public Classifier getClassifier() throws MacrobaseException {
return classifier;
}
default : {
throw new MacrobaseException("Bad Classifier Type");
throw new MacroBaseException("Bad Classifier Type");
}
}
}

public BatchSummarizer getSummarizer(String outlierColumnName) throws MacrobaseException {
public BatchSummarizer getSummarizer(String outlierColumnName) throws MacroBaseException {
switch (summarizerType.toLowerCase()) {
case "fpgrowth": {
FPGrowthSummarizer summarizer = new FPGrowthSummarizer();
Expand All @@ -120,7 +117,7 @@ public BatchSummarizer getSummarizer(String outlierColumnName) throws MacrobaseE
return summarizer;
}
default: {
throw new MacrobaseException("Bad Summarizer Type");
throw new MacroBaseException("Bad Summarizer Type");
}
}
}
Expand Down
Expand Up @@ -12,7 +12,7 @@
import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.datamodel.Schema;
import edu.stanford.futuredata.macrobase.ingest.CSVDataFrameWriter;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
Expand Down Expand Up @@ -143,17 +143,17 @@ public APLExplanation results() throws Exception {
return explanation;
}

private Map<String, Schema.ColType> getColTypes() throws MacrobaseException {
private Map<String, Schema.ColType> getColTypes() throws MacroBaseException {
Map<String, Schema.ColType> colTypes = new HashMap<>();
colTypes.put(countColumn, Schema.ColType.DOUBLE);
switch (classifierType) {
case "meanshift":
case "arithmetic": {
colTypes.put(meanColumn
.orElseThrow(() -> new MacrobaseException("mean column not present in config")),
.orElseThrow(() -> new MacroBaseException("mean column not present in config")),
Schema.ColType.DOUBLE);
colTypes.put(stdColumn
.orElseThrow(() -> new MacrobaseException("std column not present in config")),
.orElseThrow(() -> new MacroBaseException("std column not present in config")),
Schema.ColType.DOUBLE);
return colTypes;
}
Expand All @@ -166,33 +166,33 @@ private Map<String, Schema.ColType> getColTypes() throws MacrobaseException {
case "predicate": {
if (isStrPredicate) {
colTypes.put(metric.orElseThrow(
() -> new MacrobaseException("metric column not present in config")),
() -> new MacroBaseException("metric column not present in config")),
Schema.ColType.STRING);
} else {
colTypes.put(metric.orElseThrow(
() -> new MacrobaseException("metric column not present in config")),
() -> new MacroBaseException("metric column not present in config")),
Schema.ColType.DOUBLE);
}
return colTypes;
}
case "raw": {
colTypes.put(meanColumn.orElseThrow(
() -> new MacrobaseException("mean column not present in config")),
() -> new MacroBaseException("mean column not present in config")),
Schema.ColType.DOUBLE);
}
default:
throw new MacrobaseException("Bad Classifier Name");
throw new MacroBaseException("Bad Classifier Name");
}
}

private CubeClassifier getClassifier() throws MacrobaseException {
private CubeClassifier getClassifier() throws MacroBaseException {
switch (classifierType) {
case "arithmetic": {
ArithmeticClassifier classifier =
new ArithmeticClassifier(countColumn, meanColumn.orElseThrow(
() -> new MacrobaseException("mean column not present in config")),
() -> new MacroBaseException("mean column not present in config")),
stdColumn.orElseThrow(
() -> new MacrobaseException("std column not present in config")));
() -> new MacroBaseException("std column not present in config")));
classifier.setPercentile(cutoff);
classifier.setIncludeHigh(includeHi);
classifier.setIncludeLow(includeLo);
Expand All @@ -210,12 +210,12 @@ private CubeClassifier getClassifier() throws MacrobaseException {
if (isStrPredicate) {
return new PredicateCubeClassifier(countColumn,
metric.orElseThrow(
() -> new MacrobaseException("metric column not present in config")),
() -> new MacroBaseException("metric column not present in config")),
predicateStr, strCutoff);
}
return new PredicateCubeClassifier(countColumn,
metric.orElseThrow(
() -> new MacrobaseException("metric column not present in config")),
() -> new MacroBaseException("metric column not present in config")),
predicateStr, cutoff);
}

Expand All @@ -224,11 +224,11 @@ private CubeClassifier getClassifier() throws MacrobaseException {
return new RawClassifier(
countColumn,
meanColumn.orElseThrow(
() -> new MacrobaseException("mean column not present in config"))
() -> new MacroBaseException("mean column not present in config"))
);
}
default:
throw new MacrobaseException("Bad Classifier Name");
throw new MacroBaseException("Bad Classifier Name");
}
}

Expand All @@ -238,9 +238,9 @@ private APLSummarizer getSummarizer(CubeClassifier classifier) throws Exception
APLMeanSummarizer summarizer = new APLMeanSummarizer();
summarizer.setCountColumn(countColumn);
summarizer.setMeanColumn(meanColumn.orElseThrow(
() -> new MacrobaseException("mean column not present in config")));
() -> new MacroBaseException("mean column not present in config")));
summarizer.setStdColumn(stdColumn.orElseThrow(
() -> new MacrobaseException("std column not present in config")));
() -> new MacroBaseException("std column not present in config")));
summarizer.setAttributes(attributes);
summarizer.setMinSupport(minSupport);
summarizer.setMinStdDev(minRatioMetric);
Expand Down
Expand Up @@ -5,7 +5,7 @@
import edu.stanford.futuredata.macrobase.datamodel.Schema;
import edu.stanford.futuredata.macrobase.ingest.CSVDataFrameParser;
import edu.stanford.futuredata.macrobase.ingest.RESTDataFrameLoader;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;

import java.util.Map;
import java.util.List;
Expand Down Expand Up @@ -51,13 +51,13 @@ public static DataFrame loadDataFrame(
DataFrame df = loader.load();
return df;
} else {
throw new MacrobaseException("Unsupported URI");
throw new MacroBaseException("Unsupported URI");
}
}

public static Pipeline createPipeline(
PipelineConfig conf
) throws MacrobaseException {
) throws MacroBaseException {
String pipelineName = conf.get("pipeline");
switch (pipelineName) {
case "BasicBatchPipeline": {
Expand All @@ -67,7 +67,7 @@ public static Pipeline createPipeline(
return new CubePipeline(conf);
}
default: {
throw new MacrobaseException("Bad Pipeline");
throw new MacroBaseException("Bad Pipeline");
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions docs/source/user-guide/parameters.md
@@ -1,11 +1,11 @@
# Configuration Parameters

This document describes the important parameters in Macrobase that
This document describes the important parameters in MacroBase that
affect its behavior.

## Workload-specific parameters

We first describe workload-specific parameters. Macrobase will throw an exception if
We first describe workload-specific parameters. MacroBase will throw an exception if
parameters that don't have a default value aren't specified.

<table class="table">
Expand Down Expand Up @@ -48,7 +48,7 @@ reciprocal of the value for each point (i.e. 1/value).
<td>(none)</td>
<td>
Query used to specify input data source. Can be used to restrict
the rows or columns Macrobase needs to look at. For example,
the rows or columns MacroBase needs to look at. For example,
<code>SELECT tabl1.col1, tabl1.col2, tabl2.col3 FROM tabl1, tabl2 WHERE tabl1.id == tabl2.id;</code>
</td>
</tr>
Expand Down
2 changes: 1 addition & 1 deletion lib/readme.md
@@ -1,4 +1,4 @@
## Macrobase core operator library
## MacroBase core operator library

Basic operators for explaining differences in
high-dimensional data using categorical attributes.
Expand Down
@@ -1,7 +1,7 @@
package edu.stanford.futuredata.macrobase.analysis;

import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;
import java.lang.reflect.InvocationTargetException;
import java.util.Arrays;
import java.util.HashMap;
Expand Down Expand Up @@ -49,12 +49,12 @@ protected MBFunction(String columnName) {
* Call this method to to invoke the function and generate the output column that results from
* applying it on the DataFrame
*
* @throws MacrobaseException If the column name (specified in the constructor of the
* @throws MacroBaseException If the column name (specified in the constructor of the
* MBFunction) isn't present in the DataFrame, an exception is thrown.
*/
public final double[] apply(final DataFrame df) throws MacrobaseException {
public final double[] apply(final DataFrame df) throws MacroBaseException {
if (!df.hasColumn(columnName)) {
throw new MacrobaseException(columnName + " not present in DataFrame");
throw new MacroBaseException(columnName + " not present in DataFrame");
}
final double[] outputCol = new double[df.getNumRows()];
this.applyFunction(df.getDoubleColumnByName(columnName), outputCol);
Expand All @@ -68,11 +68,11 @@ public final double[] apply(final DataFrame df) throws MacrobaseException {
* would be "normalize" (or "NORMALIZE"---the funcName is case-agnostic), and the {@param arg}
* would be "battery_drain".
*
* @throws MacrobaseException If there's an error instantiating the MBFunction (usually due to
* @throws MacroBaseException If there's an error instantiating the MBFunction (usually due to
* incorrect arguments or improperly defined subclasses), an exception is thrown.
*/
public static MBFunction getFunction(String funcName, String arg)
throws MacrobaseException {
throws MacroBaseException {
Class<? extends MBFunction> clazz;
switch (funcName.toLowerCase()) {
case "normalize": {
Expand All @@ -84,13 +84,13 @@ public static MBFunction getFunction(String funcName, String arg)
break;
}
default: {
throw new MacrobaseException("Bad MBFunction Type: " + funcName);
throw new MacroBaseException("Bad MBFunction Type: " + funcName);
}
}
try {
return clazz.getConstructor(String.class).newInstance(arg);
} catch (NoSuchMethodException | InstantiationException | InvocationTargetException | IllegalAccessException e) {
throw new MacrobaseException(
throw new MacroBaseException(
"MBFunction Type " + funcName + " incompatible with args (" + arg + ")");
}
}
Expand Down
@@ -1,12 +1,6 @@
package edu.stanford.futuredata.macrobase.analysis.classify;

import com.google.common.base.Joiner;
import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.operator.Transformer;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import java.lang.reflect.InvocationTargetException;
import java.util.BitSet;
import java.util.List;

public abstract class Classifier implements Transformer {

Expand Down
Expand Up @@ -2,7 +2,7 @@

import edu.stanford.futuredata.macrobase.analysis.classify.stats.MBPredicate;
import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;
import java.util.function.DoublePredicate;
import java.util.function.Predicate;

Expand Down Expand Up @@ -36,7 +36,7 @@ public class PredicateClassifier extends Classifier {
*/
public PredicateClassifier(final String columnName, final String predicateStr,
final double sentinel)
throws MacrobaseException {
throws MacroBaseException {
super(columnName);
this.predicate = MBPredicate.getDoublePredicate(predicateStr, sentinel);
this.isStrPredicate = false;
Expand All @@ -50,7 +50,7 @@ public PredicateClassifier(final String columnName, final String predicateStr,
*/
public PredicateClassifier(final String columnName, final String predicateStr,
final String sentinel)
throws MacrobaseException {
throws MacroBaseException {
super(columnName);
this.strPredicate = MBPredicate.getStrPredicate(predicateStr, sentinel);
this.isStrPredicate = true;
Expand Down
Expand Up @@ -2,7 +2,7 @@

import edu.stanford.futuredata.macrobase.analysis.classify.stats.MBPredicate;
import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.util.MacroBaseException;

import java.util.function.DoublePredicate;
import java.util.function.Predicate;
Expand All @@ -26,14 +26,14 @@ public class PredicateCubeClassifier extends CubeClassifier {
* @param metricColumnName Column on which to classifier outliers
* @param predicateStr Predicate used for classification: "==", "!=", "<", ">", "<=", or ">="
* @param sentinel Sentinel value used when evaluating the predicate to determine outlier
* @throws MacrobaseException
* @throws MacroBaseException
*/
public PredicateCubeClassifier(
final String countColumnName,
final String metricColumnName,
final String predicateStr,
final double sentinel
) throws MacrobaseException {
) throws MacroBaseException {
super(countColumnName);
this.metricColumnName = metricColumnName;
this.predicate = MBPredicate.getDoublePredicate(predicateStr, sentinel);
Expand All @@ -45,14 +45,14 @@ public PredicateCubeClassifier(
* @param metricColumnName Column on which to classifier outliers
* @param predicateStr Predicate used for classification: "==", "!=", "<", ">", "<=", or ">="
* @param sentinel Sentinel value used when evaluating the predicate to determine outlier
* @throws MacrobaseException
* @throws MacroBaseException
*/
public PredicateCubeClassifier(
final String countColumnName,
final String metricColumnName,
final String predicateStr,
final String sentinel
) throws MacrobaseException {
) throws MacroBaseException {
super(countColumnName);
this.metricColumnName = metricColumnName;
this.strPredicate = MBPredicate.getStrPredicate(predicateStr, sentinel);
Expand Down
@@ -1,6 +1,6 @@
package edu.stanford.futuredata.macrobase.analysis.classify.stats;

import edu.stanford.futuredata.macrobase.util.MacrobaseInternalError;
import edu.stanford.futuredata.macrobase.util.MacroBaseInternalError;

/**
* Performs linear interpolation in a lazy manner: interpolation does not actually
Expand All @@ -24,7 +24,7 @@ public LinearInterpolator(double[] x, double[] y) throws IllegalArgumentExceptio
this.y = y;
}

public double evaluate(double value) throws MacrobaseInternalError {
public double evaluate(double value) throws MacroBaseInternalError {
if ((value > x[x.length - 1]) || (value < x[0])) {
return Double.NaN;
}
Expand All @@ -43,6 +43,6 @@ public double evaluate(double value) throws MacrobaseInternalError {
return slope * value + intercept;
}

throw new MacrobaseInternalError("Linear interpolator implemented incorrectly");
throw new MacroBaseInternalError("Linear interpolator implemented incorrectly");
}
}

0 comments on commit b56834c

Please sign in to comment.