Skip to content

Commit

Permalink
Work on NAS-2380
Browse files Browse the repository at this point in the history
  • Loading branch information
svcarlsen committed May 21, 2015
1 parent 08d6df4 commit 6b6c51b
Show file tree
Hide file tree
Showing 14 changed files with 47 additions and 392 deletions.
Expand Up @@ -32,9 +32,9 @@
import java.util.List;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.archive.io.arc.ARCRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dk.netarkivet.common.CommonSettings;
import dk.netarkivet.common.Constants;
Expand Down Expand Up @@ -72,7 +72,8 @@ public class LocalCDXCache implements JobIndexCache {
private static final String SUFFIX = "-index.cdx";

private final ViewerArcRepositoryClient arcRepos;
private Log log = LogFactory.getLog(LocalCDXCache.class.getName());

private final Logger log = LoggerFactory.getLogger(LocalCDXCache.class);
private static final String WORK_SUFFIX = ".unsorted";

/**
Expand Down Expand Up @@ -233,7 +234,7 @@ public void processRecord(ARCRecord record, OutputStream os) {
/**
* Is called when batch job is finished. Nothing to do.
*
* @param os ouput stream for returning output from batchjob.
* @param os output stream for returning output from batchjob.
*/
public void finish(OutputStream os) {
}
Expand Down
Expand Up @@ -25,8 +25,8 @@
import java.util.LinkedList;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dk.netarkivet.common.distribute.JMSConnectionFactory;
import dk.netarkivet.common.exceptions.HarvestingAbort;
Expand All @@ -48,7 +48,7 @@
public class FrontierReportAnalyzer implements Runnable {

/** The logger to use. */
static final Log LOG = LogFactory.getLog(FrontierReportAnalyzer.class);
static final Logger LOG = LoggerFactory.getLogger(FrontierReportAnalyzer.class);
/** The controller used to communicate with the Heritrix instance. */
private final BnfHeritrixController heritrixController;
/** The last time this Analyzer was executed. */
Expand Down
Expand Up @@ -60,7 +60,6 @@
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.datamodel.H1HeritrixTemplate;
import dk.netarkivet.harvester.datamodel.HeritrixTemplate;
import dk.netarkivet.harvester.harvesting.controller.DirectHeritrixController;
import dk.netarkivet.testutils.TestResourceUtils;
import dk.netarkivet.testutils.XmlAsserts;
import dk.netarkivet.testutils.preconfigured.MoveTestFiles;
Expand Down Expand Up @@ -123,8 +122,7 @@ private HeritrixLauncher getHeritrixLauncher(File origOrderXml, File indexDir) {
File seedsTxt = new File(crawlDir, "seeds.txt");
FileUtils.copyFile(origOrderXml, orderXml);
FileUtils.copyFile(origSeeds, seedsTxt);
//FIXME assumes H1 HeritrixFiles


HeritrixFiles files = HeritrixFiles.getH1HeritrixFilesWithDefaultJmxFiles(crawlDir,
new JobInfoTestImpl(Long.parseLong(Heritrix1ControllerTestInfo.ARC_JOB_ID),
Long.parseLong(Heritrix1ControllerTestInfo.ARC_HARVEST_ID)));
Expand Down Expand Up @@ -187,7 +185,6 @@ protected void assertNoUrlsInCrawlLog(String[] urls) throws IOException {
@Test
public void testStartMissingOrderFile() {
try {
//FIXME assumes H1 HeritrixFiles
HeritrixFiles hf = HeritrixFiles.getH1HeritrixFilesWithDefaultJmxFiles(
mtf.newTmpDir(), new JobInfoTestImpl(42L, 42L));
HeritrixLauncherFactory.getInstance(hf);
Expand All @@ -203,7 +200,6 @@ public void testStartMissingOrderFile() {
@Test
public void testStartMissingSeedsFile() {
try {
//FIXME assumes H1 HeritrixFiles
HeritrixFiles hf = HeritrixFiles.getH1HeritrixFilesWithDefaultJmxFiles(WORKING_DIR, new JobInfoTestImpl(42L, 42L));
hf.getSeedsTxtFile().delete();
HeritrixLauncherFactory.getInstance(hf);
Expand Down Expand Up @@ -274,7 +270,7 @@ private void myTesterOfBadOrderfiles(File orderfile) {
// expected case since a searched node could not be found in the bad
// XML-order-file!
} catch (ArgumentNotValid e) {
// Expected case since a templatethat is not H1 or H3 throws an exception!
// Expected case since a template that is not H1 or H3 throws an exception!
}
}

Expand Down Expand Up @@ -489,57 +485,4 @@ public void testFailDuringCrawl() {
"dk.netarkivet.harvester.harvesting.JMXHeritrixController");

}

/**
* A class that closely emulates CrawlController, except it never starts Heritrix.
*/
public static class TestCrawlController extends DirectHeritrixController {
private static final long serialVersionUID = 1L;
/**
* List of crawl status listeners.
* <p>
* All iterations need to synchronize on this object if they're to avoid concurrent modification exceptions. See
* {@link java.util.Collections#synchronizedList(List)}.
*/
private List<CrawlStatusListener> listeners = new ArrayList<CrawlStatusListener>();

public TestCrawlController(HeritrixFiles files) {
super(files);
}

/**
* Register for CrawlStatus events.
*
* @param cl a class implementing the CrawlStatusListener interface
* @see CrawlStatusListener
*/
@Override
public void addCrawlStatusListener(CrawlStatusListener cl) {
synchronized (this.listeners) {
this.listeners.add(cl);
}
}

/**
* Operator requested crawl begin
*/
@Override
public void requestCrawlStart() {
new Thread() {
public void run() {
for (CrawlStatusListener l : listeners) {
l.crawlEnding("Fake over");
l.crawlEnded("Fake all over");
}
}
}.start();
}

/**
* Starting from nothing, set up CrawlController and associated classes to be ready for a first crawl.
*/
public void initialize(SettingsHandler sH) throws InitializationException {}
@Override
public void requestCrawlStop(String test) {}
}
}

0 comments on commit 6b6c51b

Please sign in to comment.