Skip to content
Permalink
Browse files

Added logging to the comparator.

  • Loading branch information...
csrster committed Feb 16, 2016
1 parent d6bfccd commit 25589e3ad6936201503d594f36b4454c8748584d
@@ -23,16 +23,13 @@
package dk.netarkivet.harvester.scheduler.jobgen;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.antiaction.raptor.dao.AttributeBase;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.harvester.HarvesterSettings;
@@ -71,21 +68,6 @@
/** Is deduplication enabled or disabled in the settings* */
private final boolean DEDUPLICATION_ENABLED = Settings.getBoolean(HarvesterSettings.DEDUPLICATION_ENABLED);

public static String cfgToString(DomainConfiguration cfg) {
if (cfg == null) {
return "cfg{null}";
}
String result = "cfg{" + cfg.getDomainName() + "," + cfg.getName() + ","+cfg.getMaxBytes()+","+cfg.getMaxObjects()+",";
for (EAV.AttributeAndType aat: cfg.getAttributesAndTypes()){
AttributeBase ab = aat.attribute;
if (ab != null) {
result += "(" + ab.id + "," + ab.entity_id + "," + ab.type_id + "," + ab.getInteger() + ")";
}
}
result += "}";
return result;
}

public static <T> void chunk(List<T> inputList, Comparator<T> comparator) {
log.debug("Chunking list of length {}.", inputList.size());
List<List<T>> chunks = new ArrayList<>();
@@ -103,7 +85,7 @@ public static String cfgToString(DomainConfiguration cfg) {
chunks.add(newList);
if (input instanceof DomainConfiguration) {
log.debug("Creating chunk number {} with config {}.", chunks.size(),
cfgToString((DomainConfiguration) input));
DomainConfiguration.cfgToString((DomainConfiguration) input));
}
}
}
@@ -134,14 +116,14 @@ public int generateJobs(HarvestDefinition harvest) {
log.trace("Sorting domains with instance of " + domainConfigurationSubsetComparator.getClass().getName());
log.debug("Before Sorting:");
for (DomainConfiguration dc: subset) {
log.debug(cfgToString(dc));
log.debug(DomainConfiguration.cfgToString(dc));
}
// Don't really need to sort here - just by those which are equal under the comparartor.
// Collections.sort(subset, domainConfigurationSubsetComparator);
chunk(subset, domainConfigurationSubsetComparator);
log.debug("After Sorting:");
for (DomainConfiguration dc: subset) {
log.debug(cfgToString(dc));
log.debug(DomainConfiguration.cfgToString(dc));
}
log.trace("{} domainconfigs now sorted and ready to processing for harvest #{}", subset.size(),
harvest.getOid());
@@ -297,7 +279,7 @@ private boolean checkAddDomainConfInvariant(Job job, DomainConfiguration cfg, Do

if (previousCfg != null && EAV.compare2(cfg.getAttributesAndTypes(), previousCfg.getAttributesAndTypes())!=0 ) {
log.debug("Attributes have changed between configurations {} and {}",
cfgToString(previousCfg), cfgToString(cfg));
DomainConfiguration.cfgToString(previousCfg), DomainConfiguration.cfgToString(cfg));
return false;
}

@@ -161,7 +161,7 @@ protected int processDomainConfigurationSubset(HarvestDefinition harvest,
DomainConfiguration previousDomainConf = null;
while (domainConfSubset.hasNext()) {
DomainConfiguration cfg = domainConfSubset.next();
log.trace("Processing " + cfgToString(cfg));
log.trace("Processing " + DomainConfiguration.cfgToString(cfg));
if (EXCLUDE_ZERO_BUDGET && (0 == cfg.getMaxBytes() || 0 == cfg.getMaxObjects())) {
log.info("Config '{}' for '{}'" + " excluded (0{})", cfg.getName(), cfg.getDomainName(),
(cfg.getMaxBytes() == 0 ? " bytes" : " objects"));
@@ -29,12 +29,15 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.antiaction.raptor.dao.AttributeBase;

import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.exceptions.UnknownID;
import dk.netarkivet.common.utils.Named;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.harvester.HarvesterSettings;
import dk.netarkivet.harvester.datamodel.eav.EAV;
import dk.netarkivet.harvester.datamodel.eav.EAV.AttributeAndType;

/**
@@ -129,6 +132,21 @@ public DomainConfiguration(String theConfigName, String domainName, DomainHistor
this.maxBytes = Constants.DEFAULT_MAX_BYTES;
}

public static String cfgToString(DomainConfiguration cfg) {
if (cfg == null) {
return "cfg{null}";
}
String result = "cfg{" + cfg.getDomainName() + "," + cfg.getName() + ","+cfg.getMaxBytes()+","+cfg.getMaxObjects()+",";
for (AttributeAndType aat: cfg.getAttributesAndTypes()){
AttributeBase ab = aat.attribute;
if (ab != null) {
result += "(" + ab.id + "," + ab.entity_id + "," + ab.type_id + "," + ab.getInteger() + ")";
}
}
result += "}";
return result;
}

/**
* Specify the name of the order.xml template to use.
*
@@ -332,7 +350,7 @@ public long getExpectedNumberOfObjects(long objectLimit, long byteLimit) {
long prevresultfactor = Settings.getLong(HarvesterSettings.ERRORFACTOR_PERMITTED_PREVRESULT);
HarvestInfo best = DomainHistory.getBestHarvestInfoExpectation(configName, this.domainhistory);

log.trace("Using domain info '{}' for configuration '{}'", best, toString());
log.trace("Getting expectation, using domain info '{}' for configuration '{}'", best, cfgToString(this));

long expectedObjectSize = getExpectedBytesPerObject(best);
// The maximum number of objects that the maxBytes or MAX_DOMAIN_SIZE
@@ -346,13 +364,15 @@ public long getExpectedNumberOfObjects(long objectLimit, long byteLimit) {
} else {
maximum = Settings.getLong(HarvesterSettings.MAX_DOMAIN_SIZE);
}
log.trace("Initial maximum: {}", maximum);
// get last number of objects harvested
long minimum;
if (best != null) {
minimum = best.getCountObjectRetrieved();
} else {
minimum = NumberUtils.minInf(Constants.HERITRIX_MAXOBJECTS_INFINITY, maxObjects);
}
log.trace("Initial minimum: {}", minimum);
// Calculate the expected number of objects we will harvest.
long expectation;
if (best != null) {
@@ -371,16 +391,19 @@ public long getExpectedNumberOfObjects(long objectLimit, long byteLimit) {
// limit
expectation = NumberUtils.minInf(Settings.getLong(HarvesterSettings.MAX_DOMAIN_SIZE), maxObjects);
}
log.trace("Initial expectation: {}", expectation);
// Always limit to domain specifics if set to do so. We always expect
// to actually hit this limit
if ((maxObjects > Constants.HERITRIX_MAXOBJECTS_INFINITY && maximum > maxObjects)
|| (maxBytes > Constants.HERITRIX_MAXBYTES_INFINITY && maximum > maxBytes / expectedObjectSize)) {
log.trace("Using domain limits for {}", cfgToString(this));
maximum = minObjectsBytesLimit(maxObjects, maxBytes, expectedObjectSize);
log.trace("New maximum: {}", maximum);
}
// Never return more than allowed maximum
expectation = Math.min(expectation, maximum);

log.trace("Expected number of objects for configuration '{}' is {}", toString(), expectation);
log.trace("Expected number of objects for configuration '{}' is {}", cfgToString(this), expectation);

return expectation;
}

0 comments on commit 25589e3

Please sign in to comment.
You can’t perform that action at this time.