Skip to content

Commit

Permalink
NAS-2520: Allow for the caching of more than one search results at a …
Browse files Browse the repository at this point in the history
…time and cleanup of these upon job end.
  • Loading branch information
nclarkekb committed Oct 19, 2016
1 parent 645c801 commit 86c6ccc
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.netarchivesuite.heritrix3wrapper.AnypathResult;
Expand Down Expand Up @@ -49,8 +53,8 @@ protected Heritrix3JobMonitor() {
public static Heritrix3JobMonitor getInstance(Long jobId) throws IOException {
Heritrix3JobMonitor jobmonitor = new Heritrix3JobMonitor();
jobmonitor.jobId = jobId;
jobmonitor.logFile = new File("crwawllog-" + jobId + ".log");
jobmonitor.idxFile = new File("crwawllog-" + jobId + ".idx");
jobmonitor.logFile = new File("crawllog-" + jobId + ".log");
jobmonitor.idxFile = new File("crawllog-" + jobId + ".idx");
jobmonitor.init();
return jobmonitor;
}
Expand Down Expand Up @@ -153,11 +157,20 @@ public synchronized boolean isReady() {
return (bActive && bInitialized);
}

protected Map<String, SearchResult> qSearchResultMap = new HashMap<String, SearchResult>();

protected int searchResultNr = 0;

public synchronized SearchResult getSearchResult(String q) throws IOException {
return new SearchResult(this, q);
SearchResult searchResult = qSearchResultMap.get(q);
if (searchResult == null) {
searchResult = new SearchResult(this, q, searchResultNr++);
qSearchResultMap.put(q, searchResult);
}
return searchResult;
}

public synchronized void cleanup() {
public synchronized void cleanup(List<File> oldFilesList) {
bActive = false;
bInitialized = false;
hostUrl = null;
Expand All @@ -167,6 +180,16 @@ public synchronized void cleanup() {
crawlLogFilePath = null;
IOUtils.closeQuietly(logRaf);
IOUtils.closeQuietly(idxRaf);
oldFilesList.add(logFile);
oldFilesList.add(idxFile);
Iterator<SearchResult> srIter = qSearchResultMap.values().iterator();
SearchResult sr;
while (srIter.hasNext()) {
sr = srIter.next();
oldFilesList.add(sr.srIdxFile);
sr.cleanup();
}
qSearchResultMap.clear();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public void run() {
File[] oldFiles = tmpFolder.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if (name.startsWith("crwawllog-")) {
if (name.startsWith("crawllog-")) {
if (name.endsWith(".log") || name.endsWith(".idx")) {
return true;
}
Expand Down Expand Up @@ -104,9 +104,7 @@ public boolean accept(File dir, String name) {
jobmonitorIter = filterJobMonitorMap.values().iterator();
while (jobmonitorIter.hasNext()) {
jobmonitor = jobmonitorIter.next();
oldFilesList.add(jobmonitor.logFile);
oldFilesList.add(jobmonitor.idxFile);
jobmonitor.cleanup();
jobmonitor.cleanup(oldFilesList);
}
jobmonitorIter = runningJobMonitorMap.values().iterator();
while (jobmonitorIter.hasNext()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

public class JobResource implements ResourceAbstract {

private static final String NAS_GROOVY_RESOURCE_PATH = "dk/netarkivet/harvester/webinterface/servlet/nas.groovy";

private NASEnvironment environment;

protected int R_JOB = -1;
Expand Down Expand Up @@ -113,7 +115,7 @@ public void frontier_list(HttpServletRequest req, HttpServletResponse resp, List
regex =".*";
}

String resource = "dk/netarkivet/harvester/webinterface/servlet/nas.groovy";
String resource = NAS_GROOVY_RESOURCE_PATH;
InputStream in = JobResource.class.getClassLoader().getResourceAsStream(resource);
ByteArrayOutputStream bOut = new ByteArrayOutputStream();
byte[] tmpArr = new byte[8192];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@ public class SearchResult implements Pageable {
protected Pattern p;
protected Matcher m;

protected File idxFile;

protected long lastIndex;
protected File srIdxFile;

protected RandomAccessFile idxRaf;

public SearchResult(Heritrix3JobMonitor h3Job, String q) throws IOException {
protected long lastIndex;

public SearchResult(Heritrix3JobMonitor h3Job, String q, int searchResultNr) throws IOException {
this.h3Job = h3Job;
p = Pattern.compile(q, Pattern.CASE_INSENSITIVE);
m = p.matcher("42");
idxFile = new File("crwawllog-" + h3Job.jobId + "-" + "1" + ".idx");
lastIndex = 0;
idxRaf = new RandomAccessFile(idxFile, "rw");
srIdxFile = new File("crawllog-" + h3Job.jobId + "-" + searchResultNr + ".idx");
idxRaf = new RandomAccessFile(srIdxFile, "rw");
idxRaf.setLength(0);
lastIndex = 0;
}

public synchronized void update() throws IOException {
Expand Down Expand Up @@ -80,7 +80,7 @@ public synchronized void update() throws IOException {

@Override
public long getIndexSize() {
return idxFile.length();
return srIdxFile.length();
}

@Override
Expand Down

0 comments on commit 86c6ccc

Please sign in to comment.