Skip to content

Commit

Permalink
Fixed NAS-2480 - don't insert empty traps in our Heritrix templates
Browse files Browse the repository at this point in the history
  • Loading branch information
svcarlsen committed Mar 23, 2016
1 parent 6cae709 commit 4eacf59
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,7 @@ public void setCrawlerTraps(List<String> regExps, boolean strictMode) {
}
}
crawlerTraps = Collections.unmodifiableList(cleanedListOfCrawlerTraps);
log.debug("Domain {} has {} crawlertraps", domainName, crawlerTraps.size());
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,16 @@ protected synchronized Domain readKnown(Connection c, String domainName) {
Domain d = new Domain(domainName);
d.setComments(comments);
// don't throw exception if illegal regexps are found.
boolean strictMode = false;
d.setCrawlerTraps(Arrays.asList(crawlertraps.split("\n")), strictMode);
boolean strictMode = false;
String[] traps = crawlertraps.split("\n");
List<String> insertList = new ArrayList<String>();
for (String trap: traps) {
if (!trap.isEmpty()) { // Ignore empty traps (NAS-2480)
insertList.add(trap);
}
}
log.debug("Found {} crawlertraps for domain '{}' in database", insertList.size(), domainName);
d.setCrawlerTraps(insertList, strictMode);
d.setID(domainId);
d.setEdition(edition);
if (alias != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,14 @@ public void setIsActive(boolean isActive) {
public void editOrderXMLAddPerDomainCrawlerTraps(DomainConfiguration cfg) {
List<String> crawlerTraps = cfg.getCrawlertraps();
String elementName = cfg.getDomainName();
if (!crawlerTraps.isEmpty()) {
int trapCount=crawlerTraps.size();
for (String trap: crawlerTraps){
if (trap.isEmpty()) { // Ignore empty traps in the trapcount (NAS-2480)
log.warn("Found empty trap for domain", cfg.getDomainName());
trapCount--;
}
}
if (trapCount > 0) {
log.info("Inserting {} crawlertraps for domain '{}' into the template", crawlerTraps.size(), elementName);
insertCrawlerTraps(elementName, crawlerTraps);
}
Expand Down

0 comments on commit 4eacf59

Please sign in to comment.