Skip to content

Commit

Permalink
Merge branch 'master' into movers
Browse files Browse the repository at this point in the history
  • Loading branch information
sahaana committed Nov 21, 2017
2 parents 7aa05ef + 7c0ba63 commit dfe0a94
Show file tree
Hide file tree
Showing 43 changed files with 715 additions and 177 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
.project
.settings/*
*.log
*.png

# Mobile Tools for Java (J2ME)
.mtj.tmp/
Expand Down
2 changes: 1 addition & 1 deletion bin/cli.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
BIN=`dirname "$0"`
BASE=$BIN/../core
java -Xmx4g -cp "$BASE/config:$BASE/target/classes:$BASE/target/*" \
java -Xmx4g -cp "$BASE/target/classes:$BASE/target/*" \
edu.stanford.futuredata.macrobase.cli.CliRunner "$@"
15 changes: 0 additions & 15 deletions core/config/logback.xml

This file was deleted.

2 changes: 1 addition & 1 deletion core/demo/cli_cube.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ classifier: "arithmetic"
countColumn: "count"
meanColumn: "mean"
stdColumn: "std"
percentile: 1.0
cutoff: 1.0
includeLo: true
includeHi: false

Expand Down
2 changes: 1 addition & 1 deletion core/demo/cli_quantile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ quantileColumns:
"p50" : 0.5
"p90" : 0.9
"max" : 1.0
percentile: 1.0
cutoff: 1.0
includeLo: true
includeHi: false

Expand Down
24 changes: 24 additions & 0 deletions core/demo/cli_quantile_rest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"pipeline": "CubePipeline",

"inputURI": "https://raw.githubusercontent.com/stanford-futuredata/macrobase/master/core/demo/sample_cubed_quantiles.csv",
"restHeader": {},
"usePost": false,
"classifier": "quantile",
"countColumn": "count",
"meanColumn": "mean",
"quantileColumns": {
"min": 0.0,
"p10": 0.1,
"p50": 0.5,
"p90": 0.9,
"max": 1.0
},
"cutoff": 1.0,
"includeLo": true,
"includeHi": false,

"attributes": ["location", "version"],
"minSupport": 0.2,
"minRatioMetric": 10.0
}
2 changes: 1 addition & 1 deletion core/demo/demo_query.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@

"summarizer": "apriori",
"attributes": ["location", "version"],
"minRiskRatio": 10.0,
"minRatioMetric": 10.0,
"minSupport": 0.2
}
32 changes: 21 additions & 11 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,21 @@
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.3</version>
</dependency>
<dependency>
<groupId>edu.stanford.futuredata</groupId>
<artifactId>macrobase-lib</artifactId>
<version>0.2.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.8.0-beta0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>com.sparkjava</groupId>
<artifactId>spark-core</artifactId>
Expand All @@ -40,6 +45,11 @@
<artifactId>jackson-databind</artifactId>
<version>2.8.9</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>3.8.1</version>
</dependency>
</dependencies>

<build>
Expand All @@ -60,12 +70,12 @@
<configuration>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
package edu.stanford.futuredata.macrobase.cli;

import edu.stanford.futuredata.macrobase.analysis.summary.Explanation;
import edu.stanford.futuredata.macrobase.pipeline.BasicBatchPipeline;
import edu.stanford.futuredata.macrobase.pipeline.CubePipeline;
import edu.stanford.futuredata.macrobase.pipeline.PipelineConfig;
import edu.stanford.futuredata.macrobase.pipeline.Pipeline;
import edu.stanford.futuredata.macrobase.util.MacrobaseException;
import edu.stanford.futuredata.macrobase.pipeline.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -23,24 +19,9 @@ public class CliRunner {
public static void main(String[] args) throws Exception {
String configFile = args[0];
PipelineConfig conf = PipelineConfig.fromYamlFile(configFile);
Pipeline p = loadPipeline(conf);
Pipeline p = PipelineUtils.createPipeline(conf);
Explanation e = p.results();
log.info("Computed Results");
System.out.println(e.prettyPrint());
}

public static Pipeline loadPipeline(PipelineConfig conf) throws MacrobaseException{
String pipelineName = conf.get("pipeline");
switch (pipelineName) {
case "BasicBatchPipeline": {
return new BasicBatchPipeline(conf);
}
case "CubePipeline": {
return new CubePipeline(conf);
}
default: {
throw new MacrobaseException("Bad Pipeline");
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package edu.stanford.futuredata.macrobase.ingest;

import edu.stanford.futuredata.macrobase.datamodel.DataFrame;
import edu.stanford.futuredata.macrobase.datamodel.Schema;
import okhttp3.*;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;

import java.io.IOException;
import java.net.URL;
import java.util.Map;
import java.util.concurrent.TimeUnit;

public class RESTDataFrameLoader implements DataFrameLoader{
private String baseURL;
private Map<String, String> headerParams;

private boolean usePost = true;
private String jsonBody;
private Map<String, String> getParams;
private Map<String, Schema.ColType> types;

private OkHttpClient client;

public RESTDataFrameLoader(
String url,
Map<String, String> headerParams
) {
this.baseURL = url;
this.headerParams = headerParams;

OkHttpClient.Builder b = new OkHttpClient.Builder();
b.hostnameVerifier((hostname, session) -> true);
b.connectTimeout(20, TimeUnit.SECONDS);
this.client = b.build();
}
public void setUsePost(boolean flag) {
this.usePost = flag;
}
public void setJsonBody(String jsonBody) {
this.jsonBody = jsonBody;
}
public void setGetParams(Map<String, String> getParams) {
this.getParams = getParams;
}

private String postRequest() throws IOException {
URL url = new URL(baseURL);
MediaType JSON = MediaType.parse("application/json; charset=utf-8");
RequestBody body = RequestBody.create(JSON, jsonBody);
Request.Builder requestBuilder = new Request.Builder()
.url(url)
.post(body);
for (String headerKey: headerParams.keySet()) {
requestBuilder.addHeader(headerKey, headerParams.get(headerKey));
}
Request request = requestBuilder.build();
Response response = this.client.newCall(request).execute();
return response.body().string();
}

private String getRequest() throws IOException {
URL url = new URL(baseURL);
HttpUrl.Builder httpBuilder = HttpUrl.get(url).newBuilder();
if (getParams != null) {
for (String paramName : getParams.keySet()) {
httpBuilder.addQueryParameter(paramName, getParams.get(paramName));
}
}
HttpUrl fullURL = httpBuilder.build();

Request.Builder requestBuilder = new Request.Builder()
.url(fullURL)
.get();
for (String headerKey: headerParams.keySet()) {
requestBuilder.addHeader(headerKey, headerParams.get(headerKey));
}
Request request = requestBuilder.build();
Response response = this.client.newCall(request).execute();
return response.body().string();
}


@Override
public DataFrameLoader setColumnTypes(Map<String, Schema.ColType> types) {
this.types = types;
return this;
}

@Override
public DataFrame load() throws Exception {
String response;
if (usePost) {
response = postRequest();
} else {
response = getRequest();
}

CSVParser csvParser = CSVParser.parse(
response,
CSVFormat.DEFAULT.withHeader()
);
CSVDataFrameParser dfParser = new CSVDataFrameParser(csvParser);
dfParser.setColumnTypes(types);
return dfParser.load();
}
}

0 comments on commit dfe0a94

Please sign in to comment.