diff --git a/common/common-core/src/main/java/is/hi/bok/deduplicator/DigestIndexer.java b/common/common-core/src/main/java/is/hi/bok/deduplicator/DigestIndexer.java index ef0a8ee3ef..0a72245593 100644 --- a/common/common-core/src/main/java/is/hi/bok/deduplicator/DigestIndexer.java +++ b/common/common-core/src/main/java/is/hi/bok/deduplicator/DigestIndexer.java @@ -218,9 +218,10 @@ public long writeToIndex(CrawlDataIterator dataIt, String mimefilter, boolean bl } /** - * @param item + * Create Lucene Document for given CrawlDataItem. + * @param item A CrawlDataItem * @param defaultOrigin - * @return + * @return Lucene Document for the given CrawlDataItem */ private Document createDocument(CrawlDataItem item, String defaultOrigin) { Document doc = new Document(); diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/DatabaseTestUtils.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/DatabaseTestUtils.java index 28f6f7d0c2..8bbd05bfc8 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/DatabaseTestUtils.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/DatabaseTestUtils.java @@ -46,7 +46,8 @@ import dk.netarkivet.common.utils.Settings; /** - * Utilities to allow testing databases. //FIXME: Rename without Test as these are not specifically test related. + * Utilities to allow testing databases. + * FIXME: Rename without Test as these are not specifically test related. */ public class DatabaseTestUtils { @@ -60,7 +61,7 @@ public class DatabaseTestUtils { * * @param resourcePath A file that contains a test database. * @param dbCreationDir - * @return a connection to the database stored in the given file + * */ public static void createDatabase(String resourcePath, String dbname, File dbCreationDir) throws Exception { Settings.set(CommonSettings.DB_MACHINE, ""); @@ -131,7 +132,6 @@ private static void applyStatementsInInputStream(Connection connection, InputStr * * @param resourcePath A file that contains a test database. * @param dbCreationDir - * @return a connection to the database stored in the given file */ public static void createDatabase(String resourcePath, File dbCreationDir) throws Exception { createDatabase(resourcePath, "derivenamefromresource", dbCreationDir); @@ -143,7 +143,6 @@ public static void createDatabase(String resourcePath, File dbCreationDir) throw * * @param resourcePath Location of the sql files to create and populate the test DB. * @param dbCreationDir - * @return a connection to the given sample harvest definition database */ public static void createHDDB(String resourcePath, String dbname, File dbCreationDir) throws Exception { createDatabase(resourcePath, dbname, dbCreationDir); diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/H3HeritrixTemplate.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/H3HeritrixTemplate.java index dafc413eba..c265891e3f 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/H3HeritrixTemplate.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/H3HeritrixTemplate.java @@ -102,10 +102,9 @@ public class H3HeritrixTemplate extends HeritrixTemplate implements Serializable /** * Constructor for HeritrixTemplate class. * - * @param doc the order.xml - * @param verify If true, verifies if the given dom4j Document contains the elements required by our software. - * @throws ArgumentNotValid if doc is null, or verify is true and doc does not obey the constraints required by our - * software. + * @param template_id The persistent id of the template in the database + * @param template The template as String object + * @throws ArgumentNotValid if template is null. */ public H3HeritrixTemplate(long template_id, String template) { ArgumentNotValid.checkNotNull(template, "String template"); @@ -264,7 +263,7 @@ public void configureQuotaEnforcer(boolean maxObjectsIsSetByQuotaEnforcer, * Make sure that Heritrix will archive its data in the chosen archiveFormat. * * @param archiveFormat the chosen archiveformat ('arc' or 'warc' supported) - * @throw ArgumentNotValid If the chosen archiveFormat is not supported. + * @throws ArgumentNotValid If the chosen archiveFormat is not supported. */ @Override public void setArchiveFormat(String archiveFormat) { diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/HeritrixTemplate.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/HeritrixTemplate.java index c888e33032..bb1efb8505 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/HeritrixTemplate.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/HeritrixTemplate.java @@ -124,9 +124,8 @@ public void setIsActive(boolean isActive) { * global traps. * * @param elementName The name of the added element. - * @param crawlerTraps A list of crawler trap regular expressions to add to this job. + * @param crawlertraps A list of crawler trap regular expressions to add to this job. */ - public abstract void insertCrawlerTraps(String elementName, List crawlertraps); /** @@ -186,6 +185,12 @@ public void editOrderXMLAddPerDomainCrawlerTraps(DomainConfiguration cfg) { public abstract void writeToFile(File orderXmlFile); public abstract void setRecoverlogNode(File recoverlogGzFile); + /** + * Construct a H1HeritrixTemplate or H3HeritrixTemplate based on the signature of the given string. + * @param template_id The id of the template + * @param templateAsString The template as a String object + * @return a HeritrixTemplate based on the signature of the given string. + */ public static HeritrixTemplate getTemplateFromString(long template_id, String templateAsString){ if (templateAsString.contains(H1_SIGNATURE)) { try { @@ -215,8 +220,10 @@ public static HeritrixTemplate read(File orderXmlFile){ } /** - * Read the template using the given Reader - * @param reader A given Reader + * Read the template using the given Reader. + * + * @param template_id The id of the template + * @param orderTemplateReader A given Reader to read a template * @return a HeritrixTemplate object */ public static HeritrixTemplate read(long template_id, Reader orderTemplateReader) { diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/SeedList.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/SeedList.java index 2c25df604f..68944bed25 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/SeedList.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/datamodel/SeedList.java @@ -92,7 +92,7 @@ public SeedList(String name, String seedsAsString) { * * @param url The url to check * @return true, if it is accepted - * @see {@link HarvesterSettings#VALID_SEED_REGEX}. + * @see HarvesterSettings#VALID_SEED_REGEX */ private boolean isAcceptableURL(String url) { Pattern validSeedPattern = Pattern.compile(Settings.get(HarvesterSettings.VALID_SEED_REGEX)); diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/AbstractHarvestReport.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/AbstractHarvestReport.java index 829cee59ac..232b94d87a 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/AbstractHarvestReport.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/AbstractHarvestReport.java @@ -60,7 +60,7 @@ public AbstractHarvestReport() { /** * Constructor from DomainStatsReports. * - * @param files the result of parsing the crawl.log for domain statistics + * @param dsr the result of parsing the crawl.log for domain statistics */ public AbstractHarvestReport(DomainStatsReport dsr) { ArgumentNotValid.checkNotNull(dsr, "DomainStatsReport dsr"); diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/BnfHarvestReport.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/BnfHarvestReport.java index 0fda1cd8d0..2c2d83336a 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/BnfHarvestReport.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/BnfHarvestReport.java @@ -61,7 +61,7 @@ public class BnfHarvestReport extends AbstractHarvestReport{ /** * Constructor for this class. * - * @param files A HeritrixFiles object. + * @param dsr A DomainStatsReport * @throws IOFailure If the processing of the files goes wrong */ public BnfHarvestReport(DomainStatsReport dsr) throws IOFailure { diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/LegacyHarvestReport.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/LegacyHarvestReport.java index 7beed8ed4a..c39125ecff 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/LegacyHarvestReport.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/harvesting/report/LegacyHarvestReport.java @@ -40,7 +40,7 @@ import dk.netarkivet.harvester.datamodel.StopReason; /** - * Class responsible for generating a domain harvest report from crawl logs created by Heritrix and presenting the + * Class responsible for representing a domain harvest report from crawl logs created by Heritrix and presenting the * relevant information to clients. */ @SuppressWarnings({"serial"}) @@ -50,16 +50,7 @@ public class LegacyHarvestReport extends AbstractHarvestReport { private static final Logger log = LoggerFactory.getLogger(LegacyHarvestReport.class); /** - * The constructor gets the data in a crawl.log file, and parses the file. The crawl.log is described in the - * Heritrix user-manual, section 8.2.1: http://crawler.archive.org/articles/user_manual/analysis.html#logs Note: - * Invalid lines are logged and then ignored. - *

- * Each url listed in the file is assigned to a domain, the total object count and byte count per domain is - * calculated. Finally, a StopReason is found for each domain: When the response is CrawlURI.S_BLOCKED_BY_QUOTA ( - * currently = -5003), the StopReason is set to StopReason.SIZE_LIMIT, if the annotation equals "Q:group-max-all-kb" - * or StopReason.OBJECT_LIMIT, if the annotation equals "Q:group-max-fetch-successes". - * - * @param hFiles the Heritrix reports and logs. + * @dsr a DomainStatsReport for a harvest */ public LegacyHarvestReport(DomainStatsReport dsr) { super(dsr); diff --git a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/webinterface/TrapAction.java b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/webinterface/TrapAction.java index 1f7d7ca4ff..0003dcbcb8 100644 --- a/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/webinterface/TrapAction.java +++ b/harvester/harvester-core/src/main/java/dk/netarkivet/harvester/webinterface/TrapAction.java @@ -83,7 +83,6 @@ public static void processRequest(PageContext context, I18n i18n) throws Forward * * @param context the context of the servlet request triggering this action. * @param i18n the internationalisation to use for presenting the results. - * @return true, if we should continue our rendering of the page, false otherwise */ protected abstract void doAction(PageContext context, I18n i18n); diff --git a/harvester/heritrix1/heritrix1-controller/src/main/java/dk/netarkivet/harvester/harvesting/report/HarvestReportGenerator.java b/harvester/heritrix1/heritrix1-controller/src/main/java/dk/netarkivet/harvester/harvesting/report/HarvestReportGenerator.java index 8e2e4e723d..3aa1e3fadf 100644 --- a/harvester/heritrix1/heritrix1-controller/src/main/java/dk/netarkivet/harvester/harvesting/report/HarvestReportGenerator.java +++ b/harvester/heritrix1/heritrix1-controller/src/main/java/dk/netarkivet/harvester/harvesting/report/HarvestReportGenerator.java @@ -51,6 +51,14 @@ /** * Base implementation for a harvest report. + * The constructor gets the data in a crawl.log file, and parses the file. The crawl.log is described in the + * Heritrix user-manual, section 8.2.1: http://crawler.archive.org/articles/user_manual/analysis.html#logs Note: + * Invalid lines are logged and then ignored. + *

+ * Each url listed in the file is assigned to a domain, the total object count and byte count per domain is + * calculated. Finally, a StopReason is found for each domain: When the response is CrawlURI.S_BLOCKED_BY_QUOTA ( + * currently = -5003), the StopReason is set to StopReason.SIZE_LIMIT, if the annotation equals "Q:group-max-all-kb" + * or StopReason.OBJECT_LIMIT, if the annotation equals "Q:group-max-fetch-successes". */ @SuppressWarnings({"serial"}) public class HarvestReportGenerator { @@ -380,8 +388,7 @@ private String getDomainNameFromURIString(String uriAsString) throws URISyntaxEx } /** - * - * @return + * @return default stopReason */ public StopReason getDefaultStopReason() { return defaultStopReason; diff --git a/harvester/heritrix1/heritrix1-extensions/src/main/java/dk/netarkivet/harvester/harvesting/WARCWriterProcessor.java b/harvester/heritrix1/heritrix1-extensions/src/main/java/dk/netarkivet/harvester/harvesting/WARCWriterProcessor.java index 784fe820f5..1471151200 100644 --- a/harvester/heritrix1/heritrix1-extensions/src/main/java/dk/netarkivet/harvester/harvesting/WARCWriterProcessor.java +++ b/harvester/heritrix1/heritrix1-extensions/src/main/java/dk/netarkivet/harvester/harvesting/WARCWriterProcessor.java @@ -36,9 +36,6 @@ import java.util.logging.Level; import java.util.logging.Logger; -import javax.management.AttributeNotFoundException; -import javax.management.MBeanException; -import javax.management.ReflectionException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; @@ -196,9 +193,7 @@ public WARCWriterProcessor(final String name) { e.setExpertSetting(true); // Add map setting to add NAS metadata to WarcInfo records. - e = addElementToDefinition(new MapType(ATTR_METADATA_ITEMS, "Metadata items.", String.class)); - //e = addElementToDefinition(new StringList(ATTR_METADATA_ITEMS, "Metadata items.")); e.setOverrideable(true); e.setExpertSetting(true); } @@ -207,45 +202,7 @@ protected void setupPool(final AtomicInteger serialNo) { setPool(new WARCWriterPool(serialNo, this, getPoolMaximumActive(), getPoolMaximumWait())); } - /** - * @return Metadata inputs as convenient map. Returns null if no metadata items. - * @throws AttributeNotFoundException - * @throws ReflectionException - * @throws MBeanException - */ - /* - public Map getMetadataItems() throws AttributeNotFoundException, MBeanException, ReflectionException { - Map result = null; - MapType items = (MapType)getAttribute(ATTR_METADATA_ITEMS); - if (items != null) { - for (Iterator i = items.iterator(null); i.hasNext();) { - Attribute a = (Attribute)i.next(); - if (result == null) { - result = new HashMap(); - } - result.put(a.getName(), a.getValue()); - } - } - return result; - } - */ - - @SuppressWarnings("unchecked") - /* - public List getMetadataItems() { - ArrayList results = new ArrayList(); - Object obj = getAttributeUnchecked(ATTR_METADATA_ITEMS); - if (obj != null) { - List list = (StringList)obj; - for (Iterator i = list.iterator(); i.hasNext();) { - String str = (String)i.next(); - results.add(str); - } - } - return results; - } - */ - + /** * Writes a CrawlURI and its associated data to store file. *

@@ -711,15 +668,7 @@ protected String getFirstrecordBody(File orderFile) { } catch (XPathExpressionException e) { logger.log(Level.WARNING, "Error obtaining metadata items", e); } - /* catch (AttributeNotFoundException e) { - logger.log(Level.WARNING, "Error obtaining warcinfo", e); - } catch (MBeanException e) { - logger.log(Level.WARNING, "Error obtaining warcinfo", e); - } catch (ReflectionException e) { - logger.log(Level.WARNING, "Error obtaining warcinfo", e); - } - */ - + // add fields from harvesInfo.xml version 0.4 /* * 0.4 1 HIGHPRIORITY diff --git a/harvester/heritrix1/heritrix1-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java b/harvester/heritrix1/heritrix1-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java index 28c669b8e7..351264c48b 100644 --- a/harvester/heritrix1/heritrix1-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java +++ b/harvester/heritrix1/heritrix1-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java @@ -871,7 +871,7 @@ protected void doTimestampAnalysis(CrawlURI curi, Document urlHit, Statistics cu * * @param fieldName name of the field to look in. * @param value The value to query for - * @returns A Query for the given value in the given field. + * @return A Query for the given value in the given field. */ protected Query queryField(String fieldName, String value) { Query query = null; diff --git a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/ContentSizeAnnotationPostProcessor.java b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/ContentSizeAnnotationPostProcessor.java index be8674941b..ab833af3ab 100644 --- a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/ContentSizeAnnotationPostProcessor.java +++ b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/ContentSizeAnnotationPostProcessor.java @@ -55,7 +55,7 @@ public ContentSizeAnnotationPostProcessor() { * @param crawlURI URI to add annotation for if successful. * @throws ArgumentNotValid if crawlURI is null. * @throws InterruptedException never. - * @see Processor#innerProcess(org.archive.crawler.datamodel.CrawlURI) + * @see Processor */ protected void innerProcess(CrawlURI crawlURI) throws InterruptedException { ArgumentNotValid.checkNotNull(crawlURI, "CrawlURI crawlURI"); diff --git a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/DomainnameQueueAssignmentPolicy.java b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/DomainnameQueueAssignmentPolicy.java index 8de1f172c7..f47e9b446e 100644 --- a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/DomainnameQueueAssignmentPolicy.java +++ b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/DomainnameQueueAssignmentPolicy.java @@ -54,15 +54,11 @@ public class DomainnameQueueAssignmentPolicy /** Return a key for queue names based on domain names (last two parts of * host name) or IP address. They key may include a # at the end. * - * @param controller The controller the crawl is running on. - * @param cauri A potential URI. + * @param basis A potential URI. * @return a class key (really an arbitrary string), one of , * #, or "default...". - * @see HostnameQueueAssignmentPolicy#getClassKey( - * org.archive.crawler.framework.CrawlController, - * org.archive.crawler.datamodel.CandidateURI) + * @see HostnameQueueAssignmentPolicy#getClassKey(org.archive.modules.CrawlURI) */ - @Override protected String getCoreKey(UURI basis) { String candidate; diff --git a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/SeedUriDomainnameQueueAssignmentPolicy.java b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/SeedUriDomainnameQueueAssignmentPolicy.java index debb4f7022..c81a1d5ce4 100644 --- a/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/SeedUriDomainnameQueueAssignmentPolicy.java +++ b/harvester/heritrix3/heritrix3-extensions/src/main/java/dk/netarkivet/harvester/harvesting/SeedUriDomainnameQueueAssignmentPolicy.java @@ -64,13 +64,10 @@ public class SeedUriDomainnameQueueAssignmentPolicy /** Return a key for queue names based on domain names (last two parts of * host name) or IP address. They key may include a # at the end. * - * @param controller The controller the crawl is running on. * @param cauri A potential URI. * @return a class key (really an arbitrary string), one of , * #, or "default...". - * @see HostnameQueueAssignmentPolicy#getClassKey( - * org.archive.crawler.framework.CrawlController, - * org.archive.crawler.datamodel.CandidateURI) + * @see HostnameQueueAssignmentPolicy#getClassKey(CrawlURI) */ public String getClassKey(CrawlURI cauri) { String candidate; diff --git a/harvester/heritrix3/heritrix3-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java b/harvester/heritrix3/heritrix3-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java index c9d3f34b29..f6520d544f 100644 --- a/harvester/heritrix3/heritrix3-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java +++ b/harvester/heritrix3/heritrix3-extensions/src/main/java/is/hi/bok/deduplicator/DeDuplicator.java @@ -926,7 +926,7 @@ protected void doTimestampAnalysis(CrawlURI curi, Document urlHit, * * @param fieldName name of the field to look in. * @param value The value to query for - * @returns A Query for the given value in the given field. + * @return A Query for the given value in the given field. */ protected Query queryField(String fieldName, String value) { Query query = null; diff --git a/harvester/history-gui/src/main/webapp/Harveststatus-jobdetails.jsp b/harvester/history-gui/src/main/webapp/Harveststatus-jobdetails.jsp index d029489e37..8cfd9527fb 100644 --- a/harvester/history-gui/src/main/webapp/Harveststatus-jobdetails.jsp +++ b/harvester/history-gui/src/main/webapp/Harveststatus-jobdetails.jsp @@ -316,7 +316,7 @@ value="<%= jobID %>"/> %> - (text/plain)) + (text/plain) <%