Skip to content

Commit

Permalink
frontier report : Generates a full frontier report from H3 using an R…
Browse files Browse the repository at this point in the history
…EST call
  • Loading branch information
bnfklm committed Sep 15, 2016
1 parent 37e5a23 commit 01566a2
Showing 1 changed file with 23 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.lang.StringUtils;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.StringEntity;
import org.netarchivesuite.heritrix3wrapper.EngineResult;
import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper;
import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper.CrawlControllerState;
Expand Down Expand Up @@ -71,6 +74,8 @@ public class HeritrixController extends AbstractRestHeritrixController {
private int heritrix3EngineRetries;
private int heritrix3EngineIntervalBetweenRetriesInMillis;

private String baseUrl;

/**
* Create a BnfHeritrixController object.
*
Expand Down Expand Up @@ -119,6 +124,8 @@ public void initialize() {
} else {
throw new IOFailure("Unexpected error: Heritrix3 wrapper returned null engine result.");
}

baseUrl = "https://" + getHostName() + ":" + Integer.toString(getGuiPort()) + "/engine/";

// POST: Heritrix3 is up and running and responds nicely
log.info("Heritrix3 REST interface up and running");
Expand Down Expand Up @@ -499,23 +506,26 @@ private void fetchCrawlServiceJobAttributes(CrawlProgressMessage cpm, JobResult
}

/**
* Generates a full frontier report.
* Generates a full frontier report from H3 using an REST call (Groovy script)
*
* @return a Full frontier report.
*/
public FullFrontierReport getFullFrontierReport() {
//FIXME get frontier report from H3 using an appropriate REST call.
// Is the following OK: No!!!
//https://localhost:8444/engine/job/testjob/jobdir/20150210135411/reports/frontier-summary-report.txt

return null;
/*
return FullFrontierReport.parseContentsAsString(
jobName,
(String) executeOperationNoRetry(crawlServiceJobBeanName,
CrawlServiceJobOperation.frontierReport.name(), "all"));
*/
//.
//construct script request to send
HttpPost postRequest = new HttpPost(baseUrl + "job/" + jobName + "/script");
StringEntity postEntity = null;
try {
postEntity = new StringEntity("engine=beanshell&script="+dk.netarkivet.harvester.heritrix3.Constants.FRONTIER_REPORT_GROOVY_SCRIPT);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
postEntity.setContentType("application/x-www-form-urlencoded");
postRequest.addHeader("Accept", "application/xml");
postRequest.setEntity(postEntity);
JobResult result = h3wrapper.jobResult(postRequest);
return FullFrontierReport.parseContentsAsXML(
jobName, result.response, dk.netarkivet.harvester.heritrix3.Constants.XML_RAWOUT_TAG);
}

@Override
Expand Down

0 comments on commit 01566a2

Please sign in to comment.