Skip to content

Commit

Permalink
H3 jobs overview changed to show job by channel.
Browse files Browse the repository at this point in the history
A config GUI page has been added to enable/disable monitoring on a per host:port basis using Regex.
H3 job page in NAS GUI has more info displayed.
H3 scripting console added to NAS GUI.
SiteSection.java violated to support Servlet Mappings.
Added tempPath setting for controlling the temporary location of cached crawllogs.
Icon only showing in running jobs overview when the H3 host is known.
H3 username and password from settings now used.
  • Loading branch information
nclarkekb committed Oct 30, 2016
1 parent 925b285 commit b917c15
Show file tree
Hide file tree
Showing 27 changed files with 10,524 additions and 242 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,10 +152,18 @@ public void generateNavigationTree(JspWriter out, String url, Locale locale) thr
out.print("<td>&nbsp; &nbsp; <a href=\"/" + HTMLUtils.encode(dirname) + "/"
+ HTMLUtils.encode(pageAndTitle.getKey()) + "\"> "
+ HTMLUtils.escapeHtmlValues(I18n.getString(bundle, locale, pageAndTitle.getValue()))
+ "</a></td>\n");
out.print("</tr>");
+ "</a></td>");
out.print("</tr>\n");
i++;
}
if (this.getClass().getName().equalsIgnoreCase("dk.netarkivet.harvester.webinterface.HistorySiteSection")) {
out.print("<tr>");
out.print("<td>&nbsp; &nbsp; <a href=\"/" + HTMLUtils.encode(dirname) + "/"
+ HTMLUtils.encode("history") + "/\"> "
+ HTMLUtils.escapeHtmlValues(I18n.getString(bundle, locale, "H3 remote access"))
+ "</a></td>");
out.print("</tr>\n");
}
}
}

Expand Down
2 changes: 1 addition & 1 deletion harvester/harvester-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
<dependency>
<groupId>com.antiaction</groupId>
<artifactId>common-template-engine</artifactId>
<version>0.2.0-NAS</version>
<version>0.3.0-NAS</version>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ public class HarvesterSettings {
* 'startNewFilesOnCheckpoint' setting in the Heritrix WARCWriterProcessor. Only available with H3. The default is true.
*/
public static String HERITRIX_WARC_START_NEW_FILES_ON_CHECKPOINT
= "settings.harvester.harvesting.heritrix.warc.startNewFilesOnCheckpoint";
= "settings.harvester.harvesting.heritrix.warc.startNewFilesOnCheckpoint";

/**
* Currently UNUSED.
Expand Down Expand Up @@ -806,4 +806,6 @@ public class HarvesterSettings {
*/
public static String HERITRIX3_CERTIFICATE_PASSWORD = "settings.harvester.harvesting.heritrix3.certificatePassword";

public static String HERITRIX3_MONITOR_TEMP_PATH = "settings.harvester.harvesting.monitor.tempPath";

}
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,22 @@ public class HistorySiteSection extends SiteSection {
* Create a new history SiteSection object.
*/
public HistorySiteSection() {
super("sitesection;history", "Harveststatus", 3, new String[][] { {"alljobs", "pagetitle;all.jobs"},
{"deprecatedperdomain", "pagetitle;all.jobs.per.domain"}, {"running", "pagetitle;all.jobs.running"},
{"running-jobdetails", "pagetitle;running.job.details"},
{"perhd", "pagetitle;all.jobs.per.harvestdefinition"},
{"perharvestrun", "pagetitle;all.jobs.per.harvestrun"}, {"jobdetails", "pagetitle;details.for.job"},
{"perdomain", "pagetitle;all.jobs.per.domain"}, {"seeds", "pagetitle;seeds.for.harvestdefinition"}},
"History", dk.netarkivet.harvester.Constants.TRANSLATIONS_BUNDLE);
super("sitesection;history",
"Harveststatus",
3,
new String[][] {
{"alljobs", "pagetitle;all.jobs"},
{"deprecatedperdomain", "pagetitle;all.jobs.per.domain"},
{"running", "pagetitle;all.jobs.running"},
{"running-jobdetails", "pagetitle;running.job.details"},
{"perhd", "pagetitle;all.jobs.per.harvestdefinition"},
{"perharvestrun", "pagetitle;all.jobs.per.harvestrun"},
{"jobdetails", "pagetitle;details.for.job"},
{"perdomain", "pagetitle;all.jobs.per.domain"},
{"seeds", "pagetitle;seeds.for.harvestdefinition"}
},
"History", dk.netarkivet.harvester.Constants.TRANSLATIONS_BUNDLE
);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,12 @@

public class Heritrix3JobMonitor implements Pageable {

protected NASEnvironment environment;

public boolean bActive = true;

public boolean bPull = false;

public boolean bInitialized;

public long jobId;
Expand All @@ -29,6 +33,8 @@ public class Heritrix3JobMonitor implements Pageable {

public Heritrix3Wrapper h3wrapper;

public String h3HostnamePort;

public String hostUrl;

public String jobname;
Expand All @@ -50,11 +56,12 @@ public class Heritrix3JobMonitor implements Pageable {
protected Heritrix3JobMonitor() {
}

public static Heritrix3JobMonitor getInstance(Long jobId) throws IOException {
public static Heritrix3JobMonitor getInstance(Long jobId, NASEnvironment environment) throws IOException {
Heritrix3JobMonitor jobmonitor = new Heritrix3JobMonitor();
jobmonitor.environment = environment;
jobmonitor.jobId = jobId;
jobmonitor.logFile = new File("crawllog-" + jobId + ".log");
jobmonitor.idxFile = new File("crawllog-" + jobId + ".idx");
jobmonitor.logFile = new File(environment.tempPath, "crawllog-" + jobId + ".log");
jobmonitor.idxFile = new File(environment.tempPath, "crawllog-" + jobId + ".idx");
jobmonitor.init();
return jobmonitor;
}
Expand All @@ -69,7 +76,7 @@ public synchronized void init() throws IOException {
if (startedInfo != null) {
hostUrl = startedInfo.getHostUrl();
if (hostUrl != null && hostUrl.length() > 0) {
h3wrapper = Heritrix3WrapperManager.getHeritrix3Wrapper(hostUrl);
h3wrapper = Heritrix3WrapperManager.getHeritrix3Wrapper(hostUrl, environment.h3AdminName, environment.h3AdminPassword);
}
}
}
Expand All @@ -91,6 +98,21 @@ public synchronized void init() throws IOException {
}
}

public synchronized void update() throws IOException {
if (job != null) {
Job tmpJob = job = Heritrix3JobMonitorThread.jobDAO.read(jobId);
if (tmpJob != null) {
job = tmpJob;
}
}
if (jobResult != null && jobResult.job != null && jobname != null) {
JobResult tmpJobResult = h3wrapper.job(jobname);
if (tmpJobResult != null) {
jobResult = tmpJobResult;
}
}
}

public synchronized void updateCrawlLog(byte[] tmpBuf) throws IOException {
long pos;
long to;
Expand Down Expand Up @@ -122,6 +144,7 @@ public synchronized void updateCrawlLog(byte[] tmpBuf) throws IOException {
--read;
if (tmpBuf[idx++] == '\n') {
idxRaf.writeLong(pos);
lastIndexed = pos;
}
}
}
Expand All @@ -148,6 +171,11 @@ public synchronized long getIndexSize() {
return idxFile.length();
}

@Override
public long getLastIndexed() {
return lastIndexed;
}

@Override
public synchronized byte[] readPage(long page, long itemsPerPage, boolean descending) throws IOException {
return StringIndexFile.readPage(idxRaf, logRaf, page, itemsPerPage, descending);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,21 @@
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

import org.netarchivesuite.heritrix3wrapper.Heritrix3Wrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dk.netarkivet.harvester.datamodel.HarvestChannelDAO;
import dk.netarkivet.harvester.datamodel.JobDAO;
import dk.netarkivet.harvester.datamodel.RunningJobsInfoDAO;
import dk.netarkivet.harvester.harvesting.monitor.HarvestMonitor;
Expand All @@ -30,10 +34,13 @@ public class Heritrix3JobMonitorThread implements Runnable {

protected static RunningJobsInfoDAO runningJobsInfoDAO;

protected static HarvestChannelDAO harvestChannelDAO;

static {
harvestMonitor = HarvestMonitor.getInstance();
jobDAO = JobDAO.getInstance();
runningJobsInfoDAO = RunningJobsInfoDAO.getInstance();
harvestChannelDAO = HarvestChannelDAO.getInstance();
}

public Thread thread;
Expand All @@ -44,6 +51,14 @@ public class Heritrix3JobMonitorThread implements Runnable {

public Map<Long, Heritrix3JobMonitor> filterJobMonitorMap = new TreeMap<Long, Heritrix3JobMonitor>();

public Set<Heritrix3Wrapper> h3WrapperSet = new HashSet<Heritrix3Wrapper>();

public Set<String> h3HostPortSet = new HashSet<String>();

public List<String> h3HostnamePortEnabledList = new ArrayList<String>();

public List<String> h3HostnamePortDisabledList = new ArrayList<String>();

public void start() {
thread = new Thread(this, "Heritrix3 Job Monitor Thread");
thread.start();
Expand All @@ -53,12 +68,12 @@ public void start() {
public void run() {
Map<Long, Heritrix3JobMonitor> tmpJobMonitorMap;
Iterator<Heritrix3JobMonitor> jobmonitorIter;
byte[] tmpBuf = new byte[1024*1024];
byte[] tmpBuf = new byte[1024 * 1024];
try {
LOG.info("CrawlLog Thread started.");

//File tmpFolder = new File("/tmp/");
File tmpFolder = new File(".");
File tmpFolder = HistoryServlet.environment.tempPath;;
File[] oldFiles = tmpFolder.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
Expand Down Expand Up @@ -91,7 +106,8 @@ public boolean accept(File dir, String name) {
jobmonitor = runningJobMonitorMap.remove(jobId);
if (jobmonitor == null) {
try {
jobmonitor = Heritrix3WrapperManager.getJobMonitor(jobId);
// New H3 job.
jobmonitor = Heritrix3WrapperManager.getJobMonitor(jobId, HistoryServlet.environment);
} catch (IOException e) {
}
}
Expand All @@ -113,7 +129,14 @@ public boolean accept(File dir, String name) {
oldFilesMap.remove(jobmonitor.logFile.getName());
oldFilesMap.remove(jobmonitor.idxFile.getName());
}
jobmonitor.updateCrawlLog(tmpBuf);
if (!jobmonitor.bInitialized) {
jobmonitor.init();
}
checkH3HostnamePort(jobmonitor);
isH3HostnamePortEnabled(jobmonitor);
if (jobmonitor.bPull) {
jobmonitor.updateCrawlLog(tmpBuf);
}
}
if (oldFilesMap != null) {
oldFilesList.addAll(oldFilesMap.values());
Expand Down Expand Up @@ -154,4 +177,52 @@ public List<Heritrix3JobMonitor> getRunningH3Jobs() {
return h3JobsList;
}

public void checkH3HostnamePort(Heritrix3JobMonitor jobmonitor) {
Heritrix3Wrapper h3wrapper = jobmonitor.h3wrapper;
if (jobmonitor.h3HostnamePort == null && h3wrapper != null) {
synchronized (h3HostPortSet) {
jobmonitor.h3HostnamePort = h3wrapper.hostname + ":" + h3wrapper.port;
if (!h3HostPortSet.contains(jobmonitor.h3HostnamePort)) {
h3HostPortSet.add(jobmonitor.h3HostnamePort);
updateH3HostnamePortFilter();
}
}
}
}

public boolean isH3HostnamePortEnabled(Heritrix3JobMonitor jobmonitor) {
synchronized (h3HostnamePortEnabledList) {
// TODO Not ideal to do contains on a list. But its fairly short.
jobmonitor.bPull = h3HostnamePortEnabledList.contains(jobmonitor.h3HostnamePort);
}
return jobmonitor.bPull;
}

public void updateH3HostnamePortFilter() {
String h3HostnamePort;
List<String> enabledList = new LinkedList<String>();
List<String> disabledList = new LinkedList<String>();
synchronized (h3HostPortSet) {
Iterator<String> iter = h3HostPortSet.iterator();
while (iter.hasNext()) {
h3HostnamePort = iter.next();
if (HistoryServlet.environment.isH3HostnamePortEnabled(h3HostnamePort)) {
enabledList.add(h3HostnamePort);
} else {
disabledList.add(h3HostnamePort);
}
}
}
synchronized (h3HostnamePortEnabledList) {
h3HostnamePortEnabledList.clear();
h3HostnamePortEnabledList.addAll(enabledList);
Collections.sort(h3HostnamePortEnabledList);
}
synchronized (h3HostnamePortDisabledList) {
h3HostnamePortDisabledList.clear();
h3HostnamePortDisabledList.addAll(disabledList);
Collections.sort(h3HostnamePortDisabledList);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ protected Heritrix3WrapperManager() {

public static Map<String, Heritrix3Wrapper> h3wrapperMap = new HashMap<String, Heritrix3Wrapper>();

public static Heritrix3Wrapper getHeritrix3Wrapper(String h3EngineUrl) {
public static Heritrix3Wrapper getHeritrix3Wrapper(String h3EngineUrl, String username, String password) {
Heritrix3Wrapper h3wrapper = null;
if (h3EngineUrl != null) {
synchronized (h3wrapperMap) {
Expand All @@ -35,7 +35,7 @@ public static Heritrix3Wrapper getHeritrix3Wrapper(String h3EngineUrl) {
port = 80;
}
}
h3wrapper = Heritrix3Wrapper.getInstance(hostname, port, null, null, "admin", "adminPassword");
h3wrapper = Heritrix3Wrapper.getInstance(hostname, port, null, null, username, password);
h3wrapperMap.put(h3EngineUrl, h3wrapper);
}
}
Expand Down Expand Up @@ -75,12 +75,12 @@ public static String getJobname(Heritrix3Wrapper h3wrapper, long jobId) {

public static Map<Long, Heritrix3JobMonitor> h3JobmonitorMap = new TreeMap<Long, Heritrix3JobMonitor>();

public static Heritrix3JobMonitor getJobMonitor(long jobId) throws IOException {
public static Heritrix3JobMonitor getJobMonitor(long jobId, NASEnvironment environment) throws IOException {
Heritrix3JobMonitor jobmonitor;
synchronized (h3JobmonitorMap) {
jobmonitor = h3JobmonitorMap.get(jobId);
if (jobmonitor == null) {
jobmonitor = Heritrix3JobMonitor.getInstance(jobId);
jobmonitor = Heritrix3JobMonitor.getInstance(jobId, environment);
h3JobmonitorMap.put(jobId, jobmonitor);
}
}
Expand Down
Loading

0 comments on commit b917c15

Please sign in to comment.