From 9d8433846b79c9adf5e5f7e610f004d23d93a79f Mon Sep 17 00:00:00 2001 From: amihaiemil Date: Tue, 10 Jan 2017 17:23:15 +0200 Subject: [PATCH] checkstyle fixes --- checkstyle.xml | 14 --- .../amihaiemil/charles/AbstractWebCrawl.java | 53 +++++++-- .../charles/DataExportException.java | 3 +- .../com/amihaiemil/charles/GraphCrawl.java | 16 +-- .../charles/InMemoryRepository.java | 12 +- .../java/com/amihaiemil/charles/Link.java | 17 +-- .../com/amihaiemil/charles/LiveWebPage.java | 4 +- .../amihaiemil/charles/SitemapXmlCrawl.java | 20 ++-- .../amihaiemil/charles/SnapshotWebPage.java | 106 +++++++++++++----- .../amihaiemil/charles/SwitchableCrawl.java | 40 ++++--- .../java/com/amihaiemil/charles/WebCrawl.java | 21 ++-- .../java/com/amihaiemil/charles/WebPage.java | 34 +++--- .../amihaiemil/charles/sitemap/UrlSet.java | 43 +++++-- .../amihaiemil/charles/GraphCrawlITCase.java | 17 +-- 14 files changed, 256 insertions(+), 144 deletions(-) diff --git a/checkstyle.xml b/checkstyle.xml index 17a5439..0017adf 100644 --- a/checkstyle.xml +++ b/checkstyle.xml @@ -82,16 +82,6 @@ - - - - - - - - - - @@ -167,7 +157,6 @@ - @@ -267,9 +256,6 @@ - - - diff --git a/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java b/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java index 27640c1..f1aee28 100644 --- a/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java +++ b/src/main/java/com/amihaiemil/charles/AbstractWebCrawl.java @@ -23,12 +23,14 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */package com.amihaiemil.charles; + */ +package com.amihaiemil.charles; import org.openqa.selenium.WebDriver; /** - * An abstract webcrawl - contains the webdriver and other common data of each crawl. + * An abstract webcrawl - contains the webdriver and other common data of each + * crawl. * @author Mihai Andronache (amihaiemil@gmail.com) * @version $Id$ * @since 1.0.0 @@ -39,23 +41,23 @@ public abstract class AbstractWebCrawl implements WebCrawl { /** * WebDriver. */ - protected WebDriver driver; + private final WebDriver driver; /** * Ignored pages patterns. */ - protected IgnoredPatterns ignoredLinks; + private final IgnoredPatterns ignoredLinks; /** * Repo to export the pages to. */ - protected Repository repo; + private final Repository repo; /** * Pages are crawled and exported in batches in order to avoid flooding * the memory if there are many pages on a website. Default value is 100. */ - protected int batchSize; + private final int batchSize; /** * Ctor. @@ -63,13 +65,50 @@ public abstract class AbstractWebCrawl implements WebCrawl { * @param igp Ignored patterns. * @param repo Repository to export the crawled pages into. * @param batch Size of a crawl batch. + * @checkstyle ParameterNumber (6 lines) */ - public AbstractWebCrawl(WebDriver webd, IgnoredPatterns igp, Repository repo, int batch) { + public AbstractWebCrawl( + final WebDriver webd, final IgnoredPatterns igp, + final Repository repo, final int batch + ) { this.driver = webd; this.ignoredLinks = igp; this.repo = repo; this.batchSize = batch; } + @Override public abstract void crawl() throws DataExportException; + + /** + * Fetch the used WebSriver. + * @return driver Webdriver of this crawl + */ + public final WebDriver driver() { + return this.driver; + } + + /** + * Fetch the used Repository. + * @return repo Repository where the pages are sent + */ + public final Repository repo() { + return this.repo; + } + + /** + * Fetch the ignored links patterns. + * @return ignoredLinks IgnoredPatterns of this crawl + */ + public final IgnoredPatterns ignoredPatterns() { + return this.ignoredLinks; + } + + /** + * Batch size. How many pages will be crawled at once? + * @return Integer batch size. + */ + public final int batchSize() { + return this.batchSize; + } } diff --git a/src/main/java/com/amihaiemil/charles/DataExportException.java b/src/main/java/com/amihaiemil/charles/DataExportException.java index 7ac3661..3156fdf 100644 --- a/src/main/java/com/amihaiemil/charles/DataExportException.java +++ b/src/main/java/com/amihaiemil/charles/DataExportException.java @@ -23,7 +23,8 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */package com.amihaiemil.charles; + */ +package com.amihaiemil.charles; /** * Exception thrown if a page cannot be exported properly. diff --git a/src/main/java/com/amihaiemil/charles/GraphCrawl.java b/src/main/java/com/amihaiemil/charles/GraphCrawl.java index e24670c..1ec41ba 100644 --- a/src/main/java/com/amihaiemil/charles/GraphCrawl.java +++ b/src/main/java/com/amihaiemil/charles/GraphCrawl.java @@ -98,9 +98,9 @@ public GraphCrawl( @Override public void crawl() throws DataExportException { - if(!this.ignoredLinks.contains(this.index.getHref())) { + if(!this.ignoredPatterns().contains(this.index.getHref())) { List pages = new ArrayList(); - WebPage indexSnapshot = new LiveWebPage(this.driver, this.index).snapshot(); + WebPage indexSnapshot = new LiveWebPage(this.driver(), this.index).snapshot(); pages.add(indexSnapshot); Set crawledLinks = new HashSet(); @@ -114,13 +114,13 @@ public void crawl() throws DataExportException { if(toCrawl.size() > 0) { Link link = toCrawl.remove(0); while(toCrawl.size() > 0) { - if(this.ignoredLinks.contains(link.getHref())) { + if(this.ignoredPatterns().contains(link.getHref())) { link = toCrawl.remove(0); continue; } boolean notCrawledAlready = crawledLinks.add(link); if(notCrawledAlready) { - WebPage snapshotCrawled = new LiveWebPage(this.driver, link).snapshot(); + WebPage snapshotCrawled = new LiveWebPage(this.driver(), link).snapshot(); pages.add(snapshotCrawled); this.checkBatchSize(pages); toCrawl.addAll(snapshotCrawled.getLinks()); @@ -128,8 +128,8 @@ public void crawl() throws DataExportException { link = toCrawl.remove(0); } } - this.repo.export(pages); - this.driver.quit(); + this.repo().export(pages); + this.driver().quit(); } } @@ -140,8 +140,8 @@ public void crawl() throws DataExportException { * @throws DataExportException If something goes wrong during processing of crawled pages. */ private void checkBatchSize(List pages) throws DataExportException { - if(pages.size() == this.batchSize) { - this.repo.export(pages); + if(pages.size() == this.batchSize()) { + this.repo().export(pages); pages.clear(); } } diff --git a/src/main/java/com/amihaiemil/charles/InMemoryRepository.java b/src/main/java/com/amihaiemil/charles/InMemoryRepository.java index d639cdf..eb614e9 100644 --- a/src/main/java/com/amihaiemil/charles/InMemoryRepository.java +++ b/src/main/java/com/amihaiemil/charles/InMemoryRepository.java @@ -39,25 +39,25 @@ * @author Mihai Andronache (amihaiemil@gmail.com) * */ -public class InMemoryRepository implements Repository { +public final class InMemoryRepository implements Repository { /** * Holds all the crawled pages. */ - private List pages = new ArrayList(); + private final List pgs = new ArrayList(); /** * Get all the pages from this Repository. * @return List of pages. */ public List getCrawledPages() { - return this.pages; + return this.pgs; } @Override - public void export(List pages) throws DataExportException { - for(WebPage page : pages) { - this.pages.add(page); + public void export(final List pages) throws DataExportException { + for(final WebPage page : pages) { + this.pgs.add(page); } } } diff --git a/src/main/java/com/amihaiemil/charles/Link.java b/src/main/java/com/amihaiemil/charles/Link.java index 0913b23..04ecec1 100644 --- a/src/main/java/com/amihaiemil/charles/Link.java +++ b/src/main/java/com/amihaiemil/charles/Link.java @@ -46,7 +46,7 @@ public Link() { this("", ""); } - public Link(String text, String href) { + public Link(final String text, final String href) { this.text = text; this.href = href; } @@ -55,7 +55,7 @@ public String getText() { return text; } - public void setText(String text) { + public void setText(final String text) { this.text = text; } @@ -63,7 +63,7 @@ public String getHref() { return href; } - public void setHref(String href) { + public void setHref(final String href) { this.href = href; } @@ -75,8 +75,9 @@ public int hashCode() { result = prime * result + 0; } else { if (this.href.contains("#")) { - result = new Link("", href.substring(0, href.indexOf("#"))) - .hashCode(); + result = new Link( + "", this.href.substring(0, this.href.indexOf("#")) + ).hashCode(); } else { if (this.href.endsWith("/")) { result = prime @@ -103,7 +104,7 @@ public boolean equals(Object obj) { return false; } Link other = (Link) obj; - if (href == null) { + if (this.href == null) { if (other.href != null) return false; } else { @@ -121,7 +122,7 @@ public boolean equals(Object obj) { } if (this.href.endsWith("/") && other.href.endsWith("/")) { - return this.href.substring(0, href.length() - 1).equals( + return this.href.substring(0, this.href.length() - 1).equals( other.href.substring(0, other.href.length() - 1)); } else if (this.href.endsWith("/")) { return this.href.substring(0, href.length() - 1).equals( @@ -144,7 +145,7 @@ public String toString() { * * @return ture if valid, false otherwise. */ - public boolean valid(String parentLoc) { + public boolean valid(final String parentLoc) { if (this.href != null && !this.href.startsWith("mailto")) { int slashIndex = parentLoc.indexOf("/", 8);// index of the first "/" diff --git a/src/main/java/com/amihaiemil/charles/LiveWebPage.java b/src/main/java/com/amihaiemil/charles/LiveWebPage.java index 0f90c31..2e27e27 100644 --- a/src/main/java/com/amihaiemil/charles/LiveWebPage.java +++ b/src/main/java/com/amihaiemil/charles/LiveWebPage.java @@ -50,14 +50,14 @@ public final class LiveWebPage implements LivePage { /** * Visible anchors. */ - @FindBys(@FindBy(tagName=("a"))) + @FindBys(@FindBy(tagName="a")) @CacheLookup private List anchors; /** * Text content from the page. */ - @FindBy(tagName=("body")) + @FindBy(tagName="body") @CacheLookup private WebElement body; diff --git a/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java b/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java index c42ced7..18a87a0 100644 --- a/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java +++ b/src/main/java/com/amihaiemil/charles/SitemapXmlCrawl.java @@ -29,11 +29,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Set; - import org.openqa.selenium.WebDriver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import com.amihaiemil.charles.sitemap.SitemapXml; import com.amihaiemil.charles.sitemap.SitemapXmlLocation; import com.amihaiemil.charles.sitemap.Url; @@ -98,24 +96,24 @@ public void crawl() throws DataExportException { List pages = new ArrayList(); LOG.info("Started crawling the sitemap.xml..."); for(Url url : this.urlset) { - if(this.ignoredLinks.contains(url.getLoc())) { + if(this.ignoredPatterns().contains(url.getLoc())) { continue; } LOG.info("Crawling page " + url.getLoc() + "... "); - pages.add(new LiveWebPage(this.driver, url.getLoc()).snapshot()); + pages.add(new LiveWebPage(this.driver(), url.getLoc()).snapshot()); LOG.info("Done crawling page " + url.getLoc() + "!"); - if(pages.size() == this.batchSize) { - try { - this.repo.export(pages); + if(pages.size() == this.batchSize()) { + try { + this.repo().export(pages); pages.clear(); - } catch (DataExportException e) { + } catch (DataExportException e) { e.printStackTrace(); } - } + } } LOG.info("Finished crawling the sitemap.xml!"); - this.repo.export(pages); - this.driver.quit(); + this.repo().export(pages); + this.driver().quit(); } } diff --git a/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java b/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java index d3b0318..dd9819f 100644 --- a/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java +++ b/src/main/java/com/amihaiemil/charles/SnapshotWebPage.java @@ -14,15 +14,17 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */package com.amihaiemil.charles; + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package com.amihaiemil.charles; import java.util.HashSet; import java.util.Set; @@ -30,14 +32,40 @@ /** * Crawled web page. * @author Mihai Andronache (amihaiemil@gmail.com) + * @version $Id$ + * @since 1.0.0 + * @checkstyle HiddenField (160 lines) */ public final class SnapshotWebPage implements WebPage { + + /** + * Name. + */ private String name; + + /** + * Url. + */ private String url; + + /** + * Title. + */ private String title; + + /** + * Test content. + */ private String textContent; + + /** + * Set of links on the page. + */ private Set links; + /** + * Default ctor. + */ public SnapshotWebPage() { this.url = ""; this.title = ""; @@ -45,52 +73,67 @@ public SnapshotWebPage() { this.links = new HashSet(); } + /** + * Ctor. + * @param livePage LivePage to take a snapshot of + */ public SnapshotWebPage(LivePage livePage) { this.name = livePage.getName(); this.url = livePage.getUrl(); this.title = livePage.getTitle(); this.textContent = livePage.getTextContent(); links = new HashSet(); - for(Link link : livePage.getLinks()) { + for(final Link link : livePage.getLinks()) { links.add(link); } } + @Override public String getName() { return this.name; } - public void setName(String name) { + @Override + public void setName(final String name) { this.name = name; } - + + @Override public String getUrl() { return this.url; } - public void setUrl(String url) { + + @Override + public void setUrl(final String url) { this.url = url; } - + + @Override public String getTitle() { return this.title; } - public void setTitle(String title) { + @Override + public void setTitle(final String title) { this.title = title; } - + + @Override public String getTextContent() { return textContent; } - - public void setTextContent(String textContent) { + + @Override + public void setTextContent(final String textContent) { this.textContent = textContent; } + + @Override public Set getLinks() { return links; } - - public void setLinks(Set links) { + @Override + public void setLinks(final Set links) { this.links = new HashSet(); for(Link l : links) { this.links.add(l); @@ -101,24 +144,33 @@ public void setLinks(Set links) { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + ((url == null) ? 0 : url.hashCode()); + int urlhash = 0; + if(this.url!= null) { + urlhash = this.url.hashCode(); + } + result = prime * result + urlhash; return result; } @Override - public boolean equals(Object obj) { - if (this == obj) + public boolean equals(final Object obj) { + if (this == obj) { return true; - if (obj == null) + } + if (obj == null) { return false; - if (getClass() != obj.getClass()) + } + if (getClass() != obj.getClass()) { return false; + } SnapshotWebPage other = (SnapshotWebPage) obj; if (url == null) { - if (other.url != null) + if (other.url != null) { return false; - } else if (!url.equals(other.url)) + } + } else if (!this.url.equals(other.url)) { return false; + } return true; } diff --git a/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java b/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java index a6f181d..e59071c 100644 --- a/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java +++ b/src/main/java/com/amihaiemil/charles/SwitchableCrawl.java @@ -14,15 +14,17 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */package com.amihaiemil.charles; + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package com.amihaiemil.charles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -32,11 +34,16 @@ * @author Mihai Andronache (amihaiemil@gmail.com) * @version $Id$ * @since 1.0.0 + * @checkstyle IllegalCatch (100 lines) * */ public final class SwitchableCrawl implements WebCrawl { - - private static final Logger LOG = LoggerFactory.getLogger(SwitchableCrawl.class); + + /** + * Slf4J Logger. + */ + private static final Logger LOG = LoggerFactory + .getLogger(SwitchableCrawl.class); /** * Initial crawl. @@ -51,9 +58,10 @@ public final class SwitchableCrawl implements WebCrawl { /** * Ctor. * @param initial WebCrawl performed. - * @param failsafe WebCrawl performed in case the initial one fails with RuntimeException. + * @param failsafe WebCrawl performed in case the initial one + * fails with RuntimeException. */ - public SwitchableCrawl(WebCrawl initial, WebCrawl failsafe) { + public SwitchableCrawl(final WebCrawl initial, final WebCrawl failsafe) { this.initial = initial; this.failsafe = failsafe; } @@ -62,8 +70,10 @@ public SwitchableCrawl(WebCrawl initial, WebCrawl failsafe) { public void crawl() throws DataExportException { try { this.initial.crawl(); - } catch (RuntimeException ex) { - LOG.error("The initial crawl failed. Running the failsafe crawl...", ex); + } catch (final RuntimeException ex) { + LOG.error( + "The initial crawl failed. Running the failsafe crawl...", ex + ); this.failsafe.crawl(); } } diff --git a/src/main/java/com/amihaiemil/charles/WebCrawl.java b/src/main/java/com/amihaiemil/charles/WebCrawl.java index 9bc305a..e672e23 100644 --- a/src/main/java/com/amihaiemil/charles/WebCrawl.java +++ b/src/main/java/com/amihaiemil/charles/WebCrawl.java @@ -14,14 +14,15 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ package com.amihaiemil.charles; @@ -38,10 +39,10 @@ */ public interface WebCrawl { - /** + /** * Crawl the website. * @throws DataExportException If something goes wrong during processing of * crawled pages. - */ + */ void crawl() throws DataExportException; } diff --git a/src/main/java/com/amihaiemil/charles/WebPage.java b/src/main/java/com/amihaiemil/charles/WebPage.java index fcebadb..2f18831 100644 --- a/src/main/java/com/amihaiemil/charles/WebPage.java +++ b/src/main/java/com/amihaiemil/charles/WebPage.java @@ -14,15 +14,17 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */package com.amihaiemil.charles; + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package com.amihaiemil.charles; import java.util.Set; @@ -53,43 +55,43 @@ public interface WebPage { /** * Set url. - * @param url + * @param url to be set. */ void setUrl(String url); /** * Get the title of the page. - * @return String title. + * @return String title */ String getTitle(); /** * Set the page title. - * @param title + * @param title to be set on this web page. */ void setTitle(String title); /** * Get all the text content of the page. - * @return + * @return String text content of the page */ String getTextContent(); /** * Set the text content. - * @param textContent + * @param textContent Content to be set */ void setTextContent(String textContent); /** * Fetch all the anchors (links) from the page. - * @return + * @return Set of links */ Set getLinks(); /** * Set the anchors on a page. - * @param links + * @param links Set of links to be set */ void setLinks(Set links); } diff --git a/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java b/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java index 534286a..04ed785 100644 --- a/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java +++ b/src/main/java/com/amihaiemil/charles/sitemap/UrlSet.java @@ -14,14 +14,15 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. */ package com.amihaiemil.charles.sitemap; @@ -32,22 +33,42 @@ /** * Set of urls from sitemap.xml. * @author Mihai Andronache (amihaiemil@gmail.com) + * @version $Id$ + * @since 1.0.0 + * @checkstyle HiddenField (100 lines) */ @XmlAccessorType(XmlAccessType.FIELD) -@XmlRootElement(name = "urlset", namespace = "http://www.sitemaps.org/schemas/sitemap/0.9") +@XmlRootElement( + name = "urlset", namespace = "http://www.sitemaps.org/schemas/sitemap/0.9" +) public final class UrlSet { + /** + * Ctor. + */ UrlSet() { this.urls = new HashSet(); } + /** + * Set of urls. + */ @XmlElement(name="url") private Set urls; + /** + * Fetch them. + * @return Set of urls + */ public Set getUrls() { - return urls; + return this.urls; } - public void setUrls(Set urls) { + + /** + * Set them. + * @param urls Urls to be set + */ + public void setUrls(final Set urls) { this.urls = urls; } } diff --git a/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java b/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java index 2fd5fdc..83e7056 100644 --- a/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java +++ b/src/test/java/com/amihaiemil/charles/GraphCrawlITCase.java @@ -14,14 +14,15 @@ * this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR - * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER - * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. */ package com.amihaiemil.charles;