Skip to content

Commit

Permalink
Added a flag to allow upload failures without causing a job to be mar…
Browse files Browse the repository at this point in the history
…ked as FAILED
  • Loading branch information
csrster committed Dec 7, 2023
1 parent 630eb93 commit d4dba40
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 3 deletions.
Expand Up @@ -72,7 +72,7 @@ public final class Constants {
* The code will try to read the heritrix version from the pom in the jar Manifest. This
* constant is only ever read as a fallback.
* */
private static final String HERITRIX3_VERSION = "3.4.0-NAS-7.5-SNAPSHOT";
private static final String HERITRIX3_VERSION = "3.4.0-NAS-7.5.1-SNAPSHOT";

/**
* Read this much data when copying data from a file channel. Note that due to a bug in java, this should never be
Expand Down
Expand Up @@ -357,6 +357,7 @@
<!-- Default is 400000000 bytes (~400 Mbytes). -->
<minSpaceLeft>400000000</minSpaceLeft>
<oldjobsDir>oldjobs</oldjobsDir>
<allowUploadFailures>false</allowUploadFailures>
<channel/>
<sendReadyInterval>30</sendReadyInterval>
<sendReadyDelay>300</sendReadyDelay>
Expand Down
Expand Up @@ -276,6 +276,12 @@ public class HarvesterSettings {
* web servers.
*/
public static String CRAWLER_TIMEOUT_NON_RESPONDING = "settings.harvester.harvesting.heritrix.noresponseTimeout";

/**
* If true, upload failures will not cause a job to be marked as failed
*/
public static String ALLOW_UPLOAD_FAILURES = "settings.harvester.harvesting.allowUploadFailures";

/**
* <b>settings.harvester.monitor.refreshInterval</b>:<br>
* Time interval in seconds after which the harvest monitor pages will be automatically refreshed.
Expand Down
Expand Up @@ -98,6 +98,7 @@ National Library.
<!-- Default is 400000000 bytes (~400 Mbytes). -->
<minSpaceLeft>400000000</minSpaceLeft>
<oldjobsDir>oldjobs</oldjobsDir>
<allowUploadFailures>false</allowUploadFailures>
<channel></channel>
<sendReadyInterval>30</sendReadyInterval>
<sendReadyDelay>300</sendReadyDelay>
Expand Down
Expand Up @@ -318,7 +318,11 @@ private void uploadFiles(List<File> files, StringBuilder errorMessage, List<File
File oldJobsDir = new File(Settings.get(HarvesterSettings.HARVEST_CONTROLLER_OLDJOBSDIR));
String errorMsg = "Error uploading file '" + f.getAbsolutePath() + "' Will be moved to the oldjobs directory '"
+ oldJobsDir.getAbsolutePath() + "'";
errorMessage.append(errorMsg).append("\n").append(e.toString()).append("\n");
//TODO To make upload errors non fatal, simply don't append an errorMessage here. Use a flag.
boolean allowUploadFailures = Settings.getBoolean(HarvesterSettings.ALLOW_UPLOAD_FAILURES);
if (!allowUploadFailures) {
errorMessage.append(errorMsg).append("\n").append(e.toString()).append("\n");
}
log.warn(errorMsg, e);
failedFiles.add(f);
}
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Expand Up @@ -19,7 +19,7 @@
<webarchive-commons.version>1.1.5</webarchive-commons.version>
<!-- Heritrix versions are from https://github.com/netarchivesuite/heritrix3 which tracks the official
repository at https://github.com/internetarchive/heritrix3 as closely as we can -->
<heritrix3.version>3.4.0-NAS-7.5</heritrix3.version>
<heritrix3.version>3.4.0-NAS-7.5.1-SNAPSHOT</heritrix3.version>
<heritrix3-wrapper.version>1.0.5</heritrix3-wrapper.version>
<wayback.version>1.8.0-20130411</wayback.version>
<openwayback.version>2.0.0</openwayback.version>
Expand Down

0 comments on commit d4dba40

Please sign in to comment.