Skip to content

Commit

Permalink
Javadoc fixes, and one typo
Browse files Browse the repository at this point in the history
  • Loading branch information
svcarlsen committed Jan 15, 2016
1 parent 0cecf28 commit 65ee048
Show file tree
Hide file tree
Showing 17 changed files with 44 additions and 99 deletions.
Expand Up @@ -218,9 +218,10 @@ public long writeToIndex(CrawlDataIterator dataIt, String mimefilter, boolean bl
}

/**
* @param item
* Create Lucene Document for given CrawlDataItem.
* @param item A CrawlDataItem
* @param defaultOrigin
* @return
* @return Lucene Document for the given CrawlDataItem
*/
private Document createDocument(CrawlDataItem item, String defaultOrigin) {
Document doc = new Document();
Expand Down
Expand Up @@ -46,7 +46,8 @@
import dk.netarkivet.common.utils.Settings;

/**
* Utilities to allow testing databases. //FIXME: Rename without Test as these are not specifically test related.
* Utilities to allow testing databases.
* FIXME: Rename without Test as these are not specifically test related.
*/
public class DatabaseTestUtils {

Expand All @@ -60,7 +61,7 @@ public class DatabaseTestUtils {
*
* @param resourcePath A file that contains a test database.
* @param dbCreationDir
* @return a connection to the database stored in the given file
*
*/
public static void createDatabase(String resourcePath, String dbname, File dbCreationDir) throws Exception {
Settings.set(CommonSettings.DB_MACHINE, "");
Expand Down Expand Up @@ -131,7 +132,6 @@ private static void applyStatementsInInputStream(Connection connection, InputStr
*
* @param resourcePath A file that contains a test database.
* @param dbCreationDir
* @return a connection to the database stored in the given file
*/
public static void createDatabase(String resourcePath, File dbCreationDir) throws Exception {
createDatabase(resourcePath, "derivenamefromresource", dbCreationDir);
Expand All @@ -143,7 +143,6 @@ public static void createDatabase(String resourcePath, File dbCreationDir) throw
*
* @param resourcePath Location of the sql files to create and populate the test DB.
* @param dbCreationDir
* @return a connection to the given sample harvest definition database
*/
public static void createHDDB(String resourcePath, String dbname, File dbCreationDir) throws Exception {
createDatabase(resourcePath, dbname, dbCreationDir);
Expand Down
Expand Up @@ -102,10 +102,9 @@ public class H3HeritrixTemplate extends HeritrixTemplate implements Serializable
/**
* Constructor for HeritrixTemplate class.
*
* @param doc the order.xml
* @param verify If true, verifies if the given dom4j Document contains the elements required by our software.
* @throws ArgumentNotValid if doc is null, or verify is true and doc does not obey the constraints required by our
* software.
* @param template_id The persistent id of the template in the database
* @param template The template as String object
* @throws ArgumentNotValid if template is null.
*/
public H3HeritrixTemplate(long template_id, String template) {
ArgumentNotValid.checkNotNull(template, "String template");
Expand Down Expand Up @@ -264,7 +263,7 @@ public void configureQuotaEnforcer(boolean maxObjectsIsSetByQuotaEnforcer,
* Make sure that Heritrix will archive its data in the chosen archiveFormat.
*
* @param archiveFormat the chosen archiveformat ('arc' or 'warc' supported)
* @throw ArgumentNotValid If the chosen archiveFormat is not supported.
* @throws ArgumentNotValid If the chosen archiveFormat is not supported.
*/
@Override
public void setArchiveFormat(String archiveFormat) {
Expand Down
Expand Up @@ -124,9 +124,8 @@ public void setIsActive(boolean isActive) {
* global traps.
*
* @param elementName The name of the added element.
* @param crawlerTraps A list of crawler trap regular expressions to add to this job.
* @param crawlertraps A list of crawler trap regular expressions to add to this job.
*/

public abstract void insertCrawlerTraps(String elementName, List<String> crawlertraps);

/**
Expand Down Expand Up @@ -186,6 +185,12 @@ public void editOrderXMLAddPerDomainCrawlerTraps(DomainConfiguration cfg) {
public abstract void writeToFile(File orderXmlFile);
public abstract void setRecoverlogNode(File recoverlogGzFile);

/**
* Construct a H1HeritrixTemplate or H3HeritrixTemplate based on the signature of the given string.
* @param template_id The id of the template
* @param templateAsString The template as a String object
* @return a HeritrixTemplate based on the signature of the given string.
*/
public static HeritrixTemplate getTemplateFromString(long template_id, String templateAsString){
if (templateAsString.contains(H1_SIGNATURE)) {
try {
Expand Down Expand Up @@ -215,8 +220,10 @@ public static HeritrixTemplate read(File orderXmlFile){
}

/**
* Read the template using the given Reader
* @param reader A given Reader
* Read the template using the given Reader.
*
* @param template_id The id of the template
* @param orderTemplateReader A given Reader to read a template
* @return a HeritrixTemplate object
*/
public static HeritrixTemplate read(long template_id, Reader orderTemplateReader) {
Expand Down
Expand Up @@ -92,7 +92,7 @@ public SeedList(String name, String seedsAsString) {
*
* @param url The url to check
* @return true, if it is accepted
* @see {@link HarvesterSettings#VALID_SEED_REGEX}.
* @see HarvesterSettings#VALID_SEED_REGEX
*/
private boolean isAcceptableURL(String url) {
Pattern validSeedPattern = Pattern.compile(Settings.get(HarvesterSettings.VALID_SEED_REGEX));
Expand Down
Expand Up @@ -60,7 +60,7 @@ public AbstractHarvestReport() {
/**
* Constructor from DomainStatsReports.
*
* @param files the result of parsing the crawl.log for domain statistics
* @param dsr the result of parsing the crawl.log for domain statistics
*/
public AbstractHarvestReport(DomainStatsReport dsr) {
ArgumentNotValid.checkNotNull(dsr, "DomainStatsReport dsr");
Expand Down
Expand Up @@ -61,7 +61,7 @@ public class BnfHarvestReport extends AbstractHarvestReport{
/**
* Constructor for this class.
*
* @param files A HeritrixFiles object.
* @param dsr A DomainStatsReport
* @throws IOFailure If the processing of the files goes wrong
*/
public BnfHarvestReport(DomainStatsReport dsr) throws IOFailure {
Expand Down
Expand Up @@ -40,7 +40,7 @@
import dk.netarkivet.harvester.datamodel.StopReason;

/**
* Class responsible for generating a domain harvest report from crawl logs created by Heritrix and presenting the
* Class responsible for representing a domain harvest report from crawl logs created by Heritrix and presenting the
* relevant information to clients.
*/
@SuppressWarnings({"serial"})
Expand All @@ -50,16 +50,7 @@ public class LegacyHarvestReport extends AbstractHarvestReport {
private static final Logger log = LoggerFactory.getLogger(LegacyHarvestReport.class);

/**
* The constructor gets the data in a crawl.log file, and parses the file. The crawl.log is described in the
* Heritrix user-manual, section 8.2.1: http://crawler.archive.org/articles/user_manual/analysis.html#logs Note:
* Invalid lines are logged and then ignored.
* <p>
* Each url listed in the file is assigned to a domain, the total object count and byte count per domain is
* calculated. Finally, a StopReason is found for each domain: When the response is CrawlURI.S_BLOCKED_BY_QUOTA (
* currently = -5003), the StopReason is set to StopReason.SIZE_LIMIT, if the annotation equals "Q:group-max-all-kb"
* or StopReason.OBJECT_LIMIT, if the annotation equals "Q:group-max-fetch-successes".
*
* @param hFiles the Heritrix reports and logs.
* @dsr a DomainStatsReport for a harvest
*/
public LegacyHarvestReport(DomainStatsReport dsr) {
super(dsr);
Expand Down
Expand Up @@ -83,7 +83,6 @@ public static void processRequest(PageContext context, I18n i18n) throws Forward
*
* @param context the context of the servlet request triggering this action.
* @param i18n the internationalisation to use for presenting the results.
* @return true, if we should continue our rendering of the page, false otherwise
*/
protected abstract void doAction(PageContext context, I18n i18n);

Expand Down
Expand Up @@ -51,6 +51,14 @@

/**
* Base implementation for a harvest report.
* The constructor gets the data in a crawl.log file, and parses the file. The crawl.log is described in the
* Heritrix user-manual, section 8.2.1: http://crawler.archive.org/articles/user_manual/analysis.html#logs Note:
* Invalid lines are logged and then ignored.
* <p>
* Each url listed in the file is assigned to a domain, the total object count and byte count per domain is
* calculated. Finally, a StopReason is found for each domain: When the response is CrawlURI.S_BLOCKED_BY_QUOTA (
* currently = -5003), the StopReason is set to StopReason.SIZE_LIMIT, if the annotation equals "Q:group-max-all-kb"
* or StopReason.OBJECT_LIMIT, if the annotation equals "Q:group-max-fetch-successes".
*/
@SuppressWarnings({"serial"})
public class HarvestReportGenerator {
Expand Down Expand Up @@ -380,8 +388,7 @@ private String getDomainNameFromURIString(String uriAsString) throws URISyntaxEx
}

/**
*
* @return
* @return default stopReason
*/
public StopReason getDefaultStopReason() {
return defaultStopReason;
Expand Down
Expand Up @@ -36,9 +36,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;

import javax.management.AttributeNotFoundException;
import javax.management.MBeanException;
import javax.management.ReflectionException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
Expand Down Expand Up @@ -196,9 +193,7 @@ public WARCWriterProcessor(final String name) {
e.setExpertSetting(true);

// Add map setting to add NAS metadata to WarcInfo records.

e = addElementToDefinition(new MapType(ATTR_METADATA_ITEMS, "Metadata items.", String.class));
//e = addElementToDefinition(new StringList(ATTR_METADATA_ITEMS, "Metadata items."));
e.setOverrideable(true);
e.setExpertSetting(true);
}
Expand All @@ -207,45 +202,7 @@ protected void setupPool(final AtomicInteger serialNo) {
setPool(new WARCWriterPool(serialNo, this, getPoolMaximumActive(), getPoolMaximumWait()));
}

/**
* @return Metadata inputs as convenient map. Returns null if no metadata items.
* @throws AttributeNotFoundException
* @throws ReflectionException
* @throws MBeanException
*/
/*
public Map<String,Object> getMetadataItems() throws AttributeNotFoundException, MBeanException, ReflectionException {
Map<String,Object> result = null;
MapType items = (MapType)getAttribute(ATTR_METADATA_ITEMS);
if (items != null) {
for (Iterator i = items.iterator(null); i.hasNext();) {
Attribute a = (Attribute)i.next();
if (result == null) {
result = new HashMap<String,Object>();
}
result.put(a.getName(), a.getValue());
}
}
return result;
}
*/

@SuppressWarnings("unchecked")
/*
public List<String> getMetadataItems() {
ArrayList<String> results = new ArrayList<String>();
Object obj = getAttributeUnchecked(ATTR_METADATA_ITEMS);
if (obj != null) {
List list = (StringList)obj;
for (Iterator i = list.iterator(); i.hasNext();) {
String str = (String)i.next();
results.add(str);
}
}
return results;
}
*/


/**
* Writes a CrawlURI and its associated data to store file.
* <p>
Expand Down Expand Up @@ -711,15 +668,7 @@ protected String getFirstrecordBody(File orderFile) {
} catch (XPathExpressionException e) {
logger.log(Level.WARNING, "Error obtaining metadata items", e);
}
/* catch (AttributeNotFoundException e) {
logger.log(Level.WARNING, "Error obtaining warcinfo", e);
} catch (MBeanException e) {
logger.log(Level.WARNING, "Error obtaining warcinfo", e);
} catch (ReflectionException e) {
logger.log(Level.WARNING, "Error obtaining warcinfo", e);
}
*/


// add fields from harvesInfo.xml version 0.4
/*
* <harvestInfo> <version>0.4</version> <jobId>1</jobId> <priority>HIGHPRIORITY</priority>
Expand Down
Expand Up @@ -871,7 +871,7 @@ protected void doTimestampAnalysis(CrawlURI curi, Document urlHit, Statistics cu
*
* @param fieldName name of the field to look in.
* @param value The value to query for
* @returns A Query for the given value in the given field.
* @return A Query for the given value in the given field.
*/
protected Query queryField(String fieldName, String value) {
Query query = null;
Expand Down
Expand Up @@ -55,7 +55,7 @@ public ContentSizeAnnotationPostProcessor() {
* @param crawlURI URI to add annotation for if successful.
* @throws ArgumentNotValid if crawlURI is null.
* @throws InterruptedException never.
* @see Processor#innerProcess(org.archive.crawler.datamodel.CrawlURI)
* @see Processor
*/
protected void innerProcess(CrawlURI crawlURI) throws InterruptedException {
ArgumentNotValid.checkNotNull(crawlURI, "CrawlURI crawlURI");
Expand Down
Expand Up @@ -54,15 +54,11 @@ public class DomainnameQueueAssignmentPolicy
/** Return a key for queue names based on domain names (last two parts of
* host name) or IP address. They key may include a #<portnr> at the end.
*
* @param controller The controller the crawl is running on.
* @param cauri A potential URI.
* @param basis A potential URI.
* @return a class key (really an arbitrary string), one of <domainOrIP>,
* <domainOrIP>#<port>, or "default...".
* @see HostnameQueueAssignmentPolicy#getClassKey(
* org.archive.crawler.framework.CrawlController,
* org.archive.crawler.datamodel.CandidateURI)
* @see HostnameQueueAssignmentPolicy#getClassKey(org.archive.modules.CrawlURI)
*/

@Override
protected String getCoreKey(UURI basis) {
String candidate;
Expand Down
Expand Up @@ -64,13 +64,10 @@ public class SeedUriDomainnameQueueAssignmentPolicy
/** Return a key for queue names based on domain names (last two parts of
* host name) or IP address. They key may include a #<portnr> at the end.
*
* @param controller The controller the crawl is running on.
* @param cauri A potential URI.
* @return a class key (really an arbitrary string), one of <domainOrIP>,
* <domainOrIP>#<port>, or "default...".
* @see HostnameQueueAssignmentPolicy#getClassKey(
* org.archive.crawler.framework.CrawlController,
* org.archive.crawler.datamodel.CandidateURI)
* @see HostnameQueueAssignmentPolicy#getClassKey(CrawlURI)
*/
public String getClassKey(CrawlURI cauri) {
String candidate;
Expand Down
Expand Up @@ -926,7 +926,7 @@ protected void doTimestampAnalysis(CrawlURI curi, Document urlHit,
*
* @param fieldName name of the field to look in.
* @param value The value to query for
* @returns A Query for the given value in the given field.
* @return A Query for the given value in the given field.
*/
protected Query queryField(String fieldName, String value) {
Query query = null;
Expand Down
Expand Up @@ -316,7 +316,7 @@ value="<%= jobID %>"/>
%>
<a href="<%=link %>"><fmt:message key="show.job.0.harvesttemplate">
<fmt:param value="<%=job.getJobID()%>"/>
</fmt:message></a>&nbsp;(<a href="<%=linkWithrequestedType %>">text/plain)</a>)
</fmt:message></a>&nbsp;(<a href="<%=linkWithrequestedType %>">text/plain</a>)


<%
Expand Down

0 comments on commit 65ee048

Please sign in to comment.