diff --git a/checkstyle.xml b/checkstyle.xml
index 17a5439..0017adf 100644
--- a/checkstyle.xml
+++ b/checkstyle.xml
@@ -82,16 +82,6 @@
-
-
-
-
-
-
-
-
-
-
@@ -167,7 +157,6 @@
-
@@ -267,9 +256,6 @@
-
-
-
diff --git a/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java b/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java
index 27640c1..f1aee28 100644
--- a/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java
+++ b/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java
@@ -23,12 +23,14 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */package com.amihaiemil.charles;
+ */
+package com.amihaiemil.charles;
import org.openqa.selenium.WebDriver;
/**
- * An abstract webcrawl - contains the webdriver and other common data of each crawl.
+ * An abstract webcrawl - contains the webdriver and other common data of each
+ * crawl.
* @author Mihai Andronache (amihaiemil@gmail.com)
* @version $Id$
* @since 1.0.0
@@ -39,23 +41,23 @@ public abstract class AbstractWebCrawl implements WebCrawl {
/**
* WebDriver.
*/
- protected WebDriver driver;
+ private final WebDriver driver;
/**
* Ignored pages patterns.
*/
- protected IgnoredPatterns ignoredLinks;
+ private final IgnoredPatterns ignoredLinks;
/**
* Repo to export the pages to.
*/
- protected Repository repo;
+ private final Repository repo;
/**
* Pages are crawled and exported in batches in order to avoid flooding
* the memory if there are many pages on a website. Default value is 100.
*/
- protected int batchSize;
+ private final int batchSize;
/**
* Ctor.
@@ -63,13 +65,50 @@ public abstract class AbstractWebCrawl implements WebCrawl {
* @param igp Ignored patterns.
* @param repo Repository to export the crawled pages into.
* @param batch Size of a crawl batch.
+ * @checkstyle ParameterNumber (6 lines)
*/
- public AbstractWebCrawl(WebDriver webd, IgnoredPatterns igp, Repository repo, int batch) {
+ public AbstractWebCrawl(
+ final WebDriver webd, final IgnoredPatterns igp,
+ final Repository repo, final int batch
+ ) {
this.driver = webd;
this.ignoredLinks = igp;
this.repo = repo;
this.batchSize = batch;
}
+ @Override
public abstract void crawl() throws DataExportException;
+
+ /**
+ * Fetch the used WebSriver.
+ * @return driver Webdriver of this crawl
+ */
+ public final WebDriver driver() {
+ return this.driver;
+ }
+
+ /**
+ * Fetch the used Repository.
+ * @return repo Repository where the pages are sent
+ */
+ public final Repository repo() {
+ return this.repo;
+ }
+
+ /**
+ * Fetch the ignored links patterns.
+ * @return ignoredLinks IgnoredPatterns of this crawl
+ */
+ public final IgnoredPatterns ignoredPatterns() {
+ return this.ignoredLinks;
+ }
+
+ /**
+ * Batch size. How many pages will be crawled at once?
+ * @return Integer batch size.
+ */
+ public final int batchSize() {
+ return this.batchSize;
+ }
}
diff --git a/src/main/java/com/amihaiemil/charles/DataExportException.java b/src/main/java/com/amihaiemil/charles/DataExportException.java
index 7ac3661..3156fdf 100644
--- a/src/main/java/com/amihaiemil/charles/DataExportException.java
+++ b/src/main/java/com/amihaiemil/charles/DataExportException.java
@@ -23,7 +23,8 @@
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */package com.amihaiemil.charles;
+ */
+package com.amihaiemil.charles;
/**
* Exception thrown if a page cannot be exported properly.
diff --git a/src/main/java/com/amihaiemil/charles/GraphCrawl.java b/src/main/java/com/amihaiemil/charles/GraphCrawl.java
index e24670c..1ec41ba 100644
--- a/src/main/java/com/amihaiemil/charles/GraphCrawl.java
+++ b/src/main/java/com/amihaiemil/charles/GraphCrawl.java
@@ -98,9 +98,9 @@ public GraphCrawl(
@Override
public void crawl() throws DataExportException {
- if(!this.ignoredLinks.contains(this.index.getHref())) {
+ if(!this.ignoredPatterns().contains(this.index.getHref())) {
List pages = new ArrayList();
- WebPage indexSnapshot = new LiveWebPage(this.driver, this.index).snapshot();
+ WebPage indexSnapshot = new LiveWebPage(this.driver(), this.index).snapshot();
pages.add(indexSnapshot);
Set crawledLinks = new HashSet();
@@ -114,13 +114,13 @@ public void crawl() throws DataExportException {
if(toCrawl.size() > 0) {
Link link = toCrawl.remove(0);
while(toCrawl.size() > 0) {
- if(this.ignoredLinks.contains(link.getHref())) {
+ if(this.ignoredPatterns().contains(link.getHref())) {
link = toCrawl.remove(0);
continue;
}
boolean notCrawledAlready = crawledLinks.add(link);
if(notCrawledAlready) {
- WebPage snapshotCrawled = new LiveWebPage(this.driver, link).snapshot();
+ WebPage snapshotCrawled = new LiveWebPage(this.driver(), link).snapshot();
pages.add(snapshotCrawled);
this.checkBatchSize(pages);
toCrawl.addAll(snapshotCrawled.getLinks());
@@ -128,8 +128,8 @@ public void crawl() throws DataExportException {
link = toCrawl.remove(0);
}
}
- this.repo.export(pages);
- this.driver.quit();
+ this.repo().export(pages);
+ this.driver().quit();
}
}
@@ -140,8 +140,8 @@ public void crawl() throws DataExportException {
* @throws DataExportException If something goes wrong during processing of crawled pages.
*/
private void checkBatchSize(List pages) throws DataExportException {
- if(pages.size() == this.batchSize) {
- this.repo.export(pages);
+ if(pages.size() == this.batchSize()) {
+ this.repo().export(pages);
pages.clear();
}
}
diff --git a/src/main/java/com/amihaiemil/charles/InMemoryRepository.java b/src/main/java/com/amihaiemil/charles/InMemoryRepository.java
index d639cdf..eb614e9 100644
--- a/src/main/java/com/amihaiemil/charles/InMemoryRepository.java
+++ b/src/main/java/com/amihaiemil/charles/InMemoryRepository.java
@@ -39,25 +39,25 @@
* @author Mihai Andronache (amihaiemil@gmail.com)
*
*/
-public class InMemoryRepository implements Repository {
+public final class InMemoryRepository implements Repository {
/**
* Holds all the crawled pages.
*/
- private List pages = new ArrayList();
+ private final List pgs = new ArrayList();
/**
* Get all the pages from this Repository.
* @return List of pages.
*/
public List getCrawledPages() {
- return this.pages;
+ return this.pgs;
}
@Override
- public void export(List pages) throws DataExportException {
- for(WebPage page : pages) {
- this.pages.add(page);
+ public void export(final List pages) throws DataExportException {
+ for(final WebPage page : pages) {
+ this.pgs.add(page);
}
}
}
diff --git a/src/main/java/com/amihaiemil/charles/Link.java b/src/main/java/com/amihaiemil/charles/Link.java
index 0913b23..04ecec1 100644
--- a/src/main/java/com/amihaiemil/charles/Link.java
+++ b/src/main/java/com/amihaiemil/charles/Link.java
@@ -46,7 +46,7 @@ public Link() {
this("", "");
}
- public Link(String text, String href) {
+ public Link(final String text, final String href) {
this.text = text;
this.href = href;
}
@@ -55,7 +55,7 @@ public String getText() {
return text;
}
- public void setText(String text) {
+ public void setText(final String text) {
this.text = text;
}
@@ -63,7 +63,7 @@ public String getHref() {
return href;
}
- public void setHref(String href) {
+ public void setHref(final String href) {
this.href = href;
}
@@ -75,8 +75,9 @@ public int hashCode() {
result = prime * result + 0;
} else {
if (this.href.contains("#")) {
- result = new Link("", href.substring(0, href.indexOf("#")))
- .hashCode();
+ result = new Link(
+ "", this.href.substring(0, this.href.indexOf("#"))
+ ).hashCode();
} else {
if (this.href.endsWith("/")) {
result = prime
@@ -103,7 +104,7 @@ public boolean equals(Object obj) {
return false;
}
Link other = (Link) obj;
- if (href == null) {
+ if (this.href == null) {
if (other.href != null)
return false;
} else {
@@ -121,7 +122,7 @@ public boolean equals(Object obj) {
}
if (this.href.endsWith("/") && other.href.endsWith("/")) {
- return this.href.substring(0, href.length() - 1).equals(
+ return this.href.substring(0, this.href.length() - 1).equals(
other.href.substring(0, other.href.length() - 1));
} else if (this.href.endsWith("/")) {
return this.href.substring(0, href.length() - 1).equals(
@@ -144,7 +145,7 @@ public String toString() {
*
* @return ture if valid, false otherwise.
*/
- public boolean valid(String parentLoc) {
+ public boolean valid(final String parentLoc) {
if (this.href != null && !this.href.startsWith("mailto")) {
int slashIndex = parentLoc.indexOf("/", 8);// index of the first "/"
diff --git a/src/main/java/com/amihaiemil/charles/LiveWebPage.java b/src/main/java/com/amihaiemil/charles/LiveWebPage.java
index 0f90c31..2e27e27 100644
--- a/src/main/java/com/amihaiemil/charles/LiveWebPage.java
+++ b/src/main/java/com/amihaiemil/charles/LiveWebPage.java
@@ -50,14 +50,14 @@ public final class LiveWebPage implements LivePage {
/**
* Visible anchors.
*/
- @FindBys(@FindBy(tagName=("a")))
+ @FindBys(@FindBy(tagName="a"))
@CacheLookup
private List anchors;
/**
* Text content from the page.
*/
- @FindBy(tagName=("body"))
+ @FindBy(tagName="body")
@CacheLookup
private WebElement body;
diff --git a/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java b/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java
index c42ced7..18a87a0 100644
--- a/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java
+++ b/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java
@@ -29,11 +29,9 @@
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
-
import org.openqa.selenium.WebDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import com.amihaiemil.charles.sitemap.SitemapXml;
import com.amihaiemil.charles.sitemap.SitemapXmlLocation;
import com.amihaiemil.charles.sitemap.Url;
@@ -98,24 +96,24 @@ public void crawl() throws DataExportException {
List pages = new ArrayList();
LOG.info("Started crawling the sitemap.xml...");
for(Url url : this.urlset) {
- if(this.ignoredLinks.contains(url.getLoc())) {
+ if(this.ignoredPatterns().contains(url.getLoc())) {
continue;
}
LOG.info("Crawling page " + url.getLoc() + "... ");
- pages.add(new LiveWebPage(this.driver, url.getLoc()).snapshot());
+ pages.add(new LiveWebPage(this.driver(), url.getLoc()).snapshot());
LOG.info("Done crawling page " + url.getLoc() + "!");
- if(pages.size() == this.batchSize) {
- try {
- this.repo.export(pages);
+ if(pages.size() == this.batchSize()) {
+ try {
+ this.repo().export(pages);
pages.clear();
- } catch (DataExportException e) {
+ } catch (DataExportException e) {
e.printStackTrace();
}
- }
+ }
}
LOG.info("Finished crawling the sitemap.xml!");
- this.repo.export(pages);
- this.driver.quit();
+ this.repo().export(pages);
+ this.driver().quit();
}
}
diff --git a/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java b/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java
index d3b0318..dd9819f 100644
--- a/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java
+++ b/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java
@@ -14,15 +14,17 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */package com.amihaiemil.charles;
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.amihaiemil.charles;
import java.util.HashSet;
import java.util.Set;
@@ -30,14 +32,40 @@
/**
* Crawled web page.
* @author Mihai Andronache (amihaiemil@gmail.com)
+ * @version $Id$
+ * @since 1.0.0
+ * @checkstyle HiddenField (160 lines)
*/
public final class SnapshotWebPage implements WebPage {
+
+ /**
+ * Name.
+ */
private String name;
+
+ /**
+ * Url.
+ */
private String url;
+
+ /**
+ * Title.
+ */
private String title;
+
+ /**
+ * Test content.
+ */
private String textContent;
+
+ /**
+ * Set of links on the page.
+ */
private Set links;
+ /**
+ * Default ctor.
+ */
public SnapshotWebPage() {
this.url = "";
this.title = "";
@@ -45,52 +73,67 @@ public SnapshotWebPage() {
this.links = new HashSet();
}
+ /**
+ * Ctor.
+ * @param livePage LivePage to take a snapshot of
+ */
public SnapshotWebPage(LivePage livePage) {
this.name = livePage.getName();
this.url = livePage.getUrl();
this.title = livePage.getTitle();
this.textContent = livePage.getTextContent();
links = new HashSet();
- for(Link link : livePage.getLinks()) {
+ for(final Link link : livePage.getLinks()) {
links.add(link);
}
}
+ @Override
public String getName() {
return this.name;
}
- public void setName(String name) {
+ @Override
+ public void setName(final String name) {
this.name = name;
}
-
+
+ @Override
public String getUrl() {
return this.url;
}
- public void setUrl(String url) {
+
+ @Override
+ public void setUrl(final String url) {
this.url = url;
}
-
+
+ @Override
public String getTitle() {
return this.title;
}
- public void setTitle(String title) {
+ @Override
+ public void setTitle(final String title) {
this.title = title;
}
-
+
+ @Override
public String getTextContent() {
return textContent;
}
-
- public void setTextContent(String textContent) {
+
+ @Override
+ public void setTextContent(final String textContent) {
this.textContent = textContent;
}
+
+ @Override
public Set getLinks() {
return links;
}
-
- public void setLinks(Set links) {
+ @Override
+ public void setLinks(final Set links) {
this.links = new HashSet();
for(Link l : links) {
this.links.add(l);
@@ -101,24 +144,33 @@ public void setLinks(Set links) {
public int hashCode() {
final int prime = 31;
int result = 1;
- result = prime * result + ((url == null) ? 0 : url.hashCode());
+ int urlhash = 0;
+ if(this.url!= null) {
+ urlhash = this.url.hashCode();
+ }
+ result = prime * result + urlhash;
return result;
}
@Override
- public boolean equals(Object obj) {
- if (this == obj)
+ public boolean equals(final Object obj) {
+ if (this == obj) {
return true;
- if (obj == null)
+ }
+ if (obj == null) {
return false;
- if (getClass() != obj.getClass())
+ }
+ if (getClass() != obj.getClass()) {
return false;
+ }
SnapshotWebPage other = (SnapshotWebPage) obj;
if (url == null) {
- if (other.url != null)
+ if (other.url != null) {
return false;
- } else if (!url.equals(other.url))
+ }
+ } else if (!this.url.equals(other.url)) {
return false;
+ }
return true;
}
diff --git a/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java b/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java
index a6f181d..e59071c 100644
--- a/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java
+++ b/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java
@@ -14,15 +14,17 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */package com.amihaiemil.charles;
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.amihaiemil.charles;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -32,11 +34,16 @@
* @author Mihai Andronache (amihaiemil@gmail.com)
* @version $Id$
* @since 1.0.0
+ * @checkstyle IllegalCatch (100 lines)
*
*/
public final class SwitchableCrawl implements WebCrawl {
-
- private static final Logger LOG = LoggerFactory.getLogger(SwitchableCrawl.class);
+
+ /**
+ * Slf4J Logger.
+ */
+ private static final Logger LOG = LoggerFactory
+ .getLogger(SwitchableCrawl.class);
/**
* Initial crawl.
@@ -51,9 +58,10 @@ public final class SwitchableCrawl implements WebCrawl {
/**
* Ctor.
* @param initial WebCrawl performed.
- * @param failsafe WebCrawl performed in case the initial one fails with RuntimeException.
+ * @param failsafe WebCrawl performed in case the initial one
+ * fails with RuntimeException.
*/
- public SwitchableCrawl(WebCrawl initial, WebCrawl failsafe) {
+ public SwitchableCrawl(final WebCrawl initial, final WebCrawl failsafe) {
this.initial = initial;
this.failsafe = failsafe;
}
@@ -62,8 +70,10 @@ public SwitchableCrawl(WebCrawl initial, WebCrawl failsafe) {
public void crawl() throws DataExportException {
try {
this.initial.crawl();
- } catch (RuntimeException ex) {
- LOG.error("The initial crawl failed. Running the failsafe crawl...", ex);
+ } catch (final RuntimeException ex) {
+ LOG.error(
+ "The initial crawl failed. Running the failsafe crawl...", ex
+ );
this.failsafe.crawl();
}
}
diff --git a/src/main/java/com/amihaiemil/charles/WebCrawl.java b/src/main/java/com/amihaiemil/charles/WebCrawl.java
index 9bc305a..e672e23 100644
--- a/src/main/java/com/amihaiemil/charles/WebCrawl.java
+++ b/src/main/java/com/amihaiemil/charles/WebCrawl.java
@@ -14,14 +14,15 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
package com.amihaiemil.charles;
@@ -38,10 +39,10 @@
*/
public interface WebCrawl {
- /**
+ /**
* Crawl the website.
* @throws DataExportException If something goes wrong during processing of
* crawled pages.
- */
+ */
void crawl() throws DataExportException;
}
diff --git a/src/main/java/com/amihaiemil/charles/WebPage.java b/src/main/java/com/amihaiemil/charles/WebPage.java
index fcebadb..2f18831 100644
--- a/src/main/java/com/amihaiemil/charles/WebPage.java
+++ b/src/main/java/com/amihaiemil/charles/WebPage.java
@@ -14,15 +14,17 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */package com.amihaiemil.charles;
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+package com.amihaiemil.charles;
import java.util.Set;
@@ -53,43 +55,43 @@ public interface WebPage {
/**
* Set url.
- * @param url
+ * @param url to be set.
*/
void setUrl(String url);
/**
* Get the title of the page.
- * @return String title.
+ * @return String title
*/
String getTitle();
/**
* Set the page title.
- * @param title
+ * @param title to be set on this web page.
*/
void setTitle(String title);
/**
* Get all the text content of the page.
- * @return
+ * @return String text content of the page
*/
String getTextContent();
/**
* Set the text content.
- * @param textContent
+ * @param textContent Content to be set
*/
void setTextContent(String textContent);
/**
* Fetch all the anchors (links) from the page.
- * @return
+ * @return Set of links
*/
Set getLinks();
/**
* Set the anchors on a page.
- * @param links
+ * @param links Set of links to be set
*/
void setLinks(Set links);
}
diff --git a/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java b/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java
index 534286a..04ed785 100644
--- a/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java
+++ b/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java
@@ -14,14 +14,15 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
*/
package com.amihaiemil.charles.sitemap;
@@ -32,22 +33,42 @@
/**
* Set of urls from sitemap.xml.
* @author Mihai Andronache (amihaiemil@gmail.com)
+ * @version $Id$
+ * @since 1.0.0
+ * @checkstyle HiddenField (100 lines)
*/
@XmlAccessorType(XmlAccessType.FIELD)
-@XmlRootElement(name = "urlset", namespace = "http://www.sitemaps.org/schemas/sitemap/0.9")
+@XmlRootElement(
+ name = "urlset", namespace = "http://www.sitemaps.org/schemas/sitemap/0.9"
+)
public final class UrlSet {
+ /**
+ * Ctor.
+ */
UrlSet() {
this.urls = new HashSet();
}
+ /**
+ * Set of urls.
+ */
@XmlElement(name="url")
private Set urls;
+ /**
+ * Fetch them.
+ * @return Set of urls
+ */
public Set getUrls() {
- return urls;
+ return this.urls;
}
- public void setUrls(Set urls) {
+
+ /**
+ * Set them.
+ * @param urls Urls to be set
+ */
+ public void setUrls(final Set urls) {
this.urls = urls;
}
}
diff --git a/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java b/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java
index 2fd5fdc..83e7056 100644
--- a/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java
+++ b/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java
@@ -14,14 +14,15 @@
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.amihaiemil.charles;