Skip to content

Commit

Permalink
Merge branch 'master' of 10.68.203.26:jhy/jsoup
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Feb 7, 2010
2 parents 2a00564 + f20e2ba commit fafbfc7
Show file tree
Hide file tree
Showing 16 changed files with 382 additions and 26 deletions.
11 changes: 9 additions & 2 deletions CHANGES
@@ -1,14 +1,21 @@
jsoup changelog

*** Release 0.2.0 (pending)
*** Release 0.2.2 (2010-Feb-07)
* jsoup packages are now available in the Maven central repository.

* New feature: supports Element#addClass, removeClass, toggleClass;
also collection class methods on Elements.
* New feature: supports Element#wrap(html) and Elements#wrap(html).
* New selector syntax: supports E + F adjacent sibling selector
* New selector systax: supports E ~ F preceding sibling selector
* New: supports Element#elementSiblingIndex()

* Improved document normalisation.
* Improved HTML string output format (pretty-print)

* Fixed absolute URL resolution issue when a base tag has no href.

*** Release 0.1.2 (2010-Jan-02)
*** Release 0.1.2 (2010-Feb-02)
* Fixed unrecognised tag handler to be more permissive
<http://github.com/jhy/jsoup/issues/issue/1>

Expand Down
56 changes: 55 additions & 1 deletion pom.xml
Expand Up @@ -5,7 +5,7 @@

<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>0.1.3-SNAPSHOT</version>
<version>0.2.3-SNAPSHOT</version>
<description>jsoup HTML parser</description>
<url>http://jsoup.org/</url>
<inceptionYear>2009</inceptionYear>
Expand Down Expand Up @@ -76,6 +76,48 @@
</plugin>
</plugins>
</build>

<distributionManagement>
<snapshotRepository>
<id>sonatype-nexus-snapshots</id>
<name>Sonatype Nexus Snapshots</name>
<url>http://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>sonatype-nexus-staging</id>
<name>Nexus Release Repository</name>
<url>http://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>

<profiles>
<profile>
<id>release-sign-artifacts</id>
<activation>
<property>
<name>performRelease</name>
<value>true</value>
</property>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-gpg-plugin</artifactId>
<executions>
<execution>
<id>sign-artifacts</id>
<phase>verify</phase>
<goals>
<goal>sign</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>

<dependencies>

Expand Down Expand Up @@ -103,4 +145,16 @@
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>

<developers>
<developer>
<id>jhy</id>
<name>Jonathan Hedley</name>
<email>jonathan@hedley.net</email>
<roles>
<role>Lead Developer</role>
</roles>
<timezone>+11</timezone>
</developer>
</developers>

</project>
1 change: 1 addition & 0 deletions src/main/java/org/jsoup/nodes/Comment.java
Expand Up @@ -30,6 +30,7 @@ public String getData() {
}

void outerHtml(StringBuilder accum) {
indent(accum);
accum.append(String.format("<!--%s-->", getData()));
}

Expand Down
41 changes: 41 additions & 0 deletions src/main/java/org/jsoup/nodes/Document.java
Expand Up @@ -3,6 +3,9 @@
import org.apache.commons.lang.Validate;
import org.jsoup.parser.Tag;

import java.util.List;
import java.util.ArrayList;

/**
A HTML Document.
Expand Down Expand Up @@ -84,6 +87,44 @@ public Element createElement(String tagName) {
return new Element(Tag.valueOf(tagName), this.baseUri());
}

/**
Normalise the document. This happens after the parse phase so generally does not need to be called.
Moves any text content that is not in the body element into the body.
@return this document after normalisation
*/
public Document normalise() {
if (select("html").isEmpty())
appendElement("html");
if (head() == null)
select("html").first().appendElement("head");
if (body() == null)
select("html").first().appendElement("body");

normalise(this);
normalise(select("html").first());
normalise(head());

return this;
}

// does not recurse. the result order isn't great here (not intuitive); they are in the body though.
private void normalise(Element element) {
List<Node> toMove = new ArrayList<Node>();
for (Node node: element.childNodes) {
if (node instanceof TextNode) {
TextNode tn = (TextNode) node;
if (!tn.isBlank())
toMove.add(tn);
}
}

for (Node node: toMove) {
element.removeChild(node);
body().appendChild(new TextNode(" ", ""));
body().appendChild(node);
}
}

@Override
public String outerHtml() {
return super.html(); // no outer wrapper tag
Expand Down
92 changes: 85 additions & 7 deletions src/main/java/org/jsoup/nodes/Element.java
@@ -1,6 +1,7 @@
package org.jsoup.nodes;

import org.apache.commons.lang.Validate;
import org.apache.commons.lang.StringUtils;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
import org.jsoup.select.Collector;
Expand Down Expand Up @@ -571,6 +572,25 @@ public Element text(String text) {
return this;
}

/**
Test if this element has any text content (that is not just whitespace).
@return true if element has non-blank text content.
*/
public boolean hasText() {
for (Node child: childNodes) {
if (child instanceof TextNode) {
TextNode textNode = (TextNode) child;
if (!textNode.isBlank())
return true;
} else if (child instanceof Element) {
Element el = (Element) child;
if (el.hasText())
return true;
}
}
return false;
}

/**
* Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
* @return the data, or empty string if none
Expand Down Expand Up @@ -602,17 +622,29 @@ public String className() {

/**
* Get all of the element's class names. E.g. on element {@code <div class="header gray"}>},
* returns a set of two elements {@code "header", "gray"}.
* returns a set of two elements {@code "header", "gray"}. Note that modifications to this set are not pushed to
* the backing {@code class} attribute; use the {@link #classNames(java.util.Set)} method to persist them.
* @return set of classnames, empty if no class attribute
*/
public Set<String> classNames() {
if (classNames == null) {
String[] names = className().split("\\s+");
classNames = new HashSet<String>(Arrays.asList(names));
classNames = new LinkedHashSet<String>(Arrays.asList(names));
}
return classNames;
}

/**
Set the element's {@code class} attribute to the supplied class names.
@param classNames set of classes
@return this element, for chaining
*/
public Element classNames(Set<String> classNames) {
Validate.notNull(classNames);
attributes.put("class", StringUtils.join(classNames, " "));
return this;
}

/**
* Tests if this element has a class.
* @param className name of class to check for
Expand All @@ -622,8 +654,57 @@ public boolean hasClass(String className) {
return classNames().contains(className);
}

/**
Add a class name to this element's {@code class} attribute.
@param className class name to add
@return this element
*/
public Element addClass(String className) {
Validate.notNull(className);

Set<String> classes = classNames();
classes.add(className);
classNames(classes);

return this;
}

/**
Remove a class name from this element's {@code class} attribute.
@param className class name to remove
@return this element
*/
public Element removeClass(String className) {
Validate.notNull(className);

Set<String> classes = classNames();
classes.remove(className);
classNames(classes);

return this;
}

/**
Toggle a class name on this element's {@code class} attribute: if present, remove it; otherwise add it.
@param className class name to toggle
@return this element
*/
public Element toggleClass(String className) {
Validate.notNull(className);

Set<String> classes = classNames();
if (classes.contains(className))
classes.remove(className);
else
classes.add(className);
classNames(classes);

return this;
}

void outerHtml(StringBuilder accum) {

if (isBlock() || (parent() != null && parent().tag().canContainBlock() && siblingIndex() == 0))
indent(accum);
accum
.append("<")
.append(tagName())
Expand All @@ -633,12 +714,9 @@ void outerHtml(StringBuilder accum) {
accum.append(" />");
} else {
accum.append(">");
if (tag.canContainBlock())
accum.append("\n");
html(accum);
if (tag.canContainBlock()) indent(accum);
accum.append("</").append(tagName()).append(">");
if (tag.isBlock())
accum.append("\n");
}
}

Expand Down
22 changes: 22 additions & 0 deletions src/main/java/org/jsoup/nodes/Node.java
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.lang.NotImplementedException;
import org.apache.commons.lang.Validate;
import org.apache.commons.lang.StringUtils;

import java.net.MalformedURLException;
import java.net.URL;
Expand Down Expand Up @@ -222,6 +223,13 @@ protected void addChild(Node in) {
childNodes.add(in);
in.parentNode = this;
}

protected int nodeDepth() {
if (parentNode == null)
return 0;
else
return parentNode.nodeDepth() + 1;
}

/**
Retrieves this node's sibling nodes. Effectively, {@link #childNodes() node.parent.childNodes()}.
Expand Down Expand Up @@ -259,6 +267,16 @@ public Node previousSibling() {
return null;
}

/**
* Get the list index of this node in its node sibling list. I.e. if this is the first node
* sibling, returns 0.
* @return position in node sibling list
* @see org.jsoup.nodes.Element#elementSiblingIndex()
*/
public Integer siblingIndex() {
return indexInList(this, parent().childNodes);
}

protected static <N extends Node> Integer indexInList(N search, List<N> nodes) {
Validate.notNull(search);
Validate.notNull(nodes);
Expand Down Expand Up @@ -291,6 +309,10 @@ public String toString() {
return outerHtml();
}

protected void indent(StringBuilder accum) {
accum.append("\n").append(StringUtils.leftPad("", nodeDepth() -1 * 2));
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/org/jsoup/nodes/TextNode.java
Expand Up @@ -2,6 +2,7 @@

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.Validate;
import org.apache.commons.lang.StringUtils;

/**
A text node.
Expand Down Expand Up @@ -34,12 +35,22 @@ public String getWholeText() {
return attributes.get(TEXT_KEY);
}

/**
Test if this text node is blank -- that is, empty or only whitespace (including newlines).
@return true if this document is empty or only whitespace, false if it contains any text content.
*/
public boolean isBlank() {
return StringUtils.isBlank(normaliseWhitespace(getWholeText()));
}

void outerHtml(StringBuilder accum) {
String html = StringEscapeUtils.escapeHtml(getWholeText());
if (parent() instanceof Element && !((Element) parent()).preserveWhitespace()) {
html = normaliseWhitespace(html);
}

if (!isBlank() && parentNode instanceof Element && ((Element) parentNode).tag().canContainBlock() && siblingIndex() == 0)
indent(accum);
accum.append(html);
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/parser/Parser.java
Expand Up @@ -78,7 +78,7 @@ private Document parse() {
parseTextNode();
}
}
return doc;
return doc.normalise();
}

private void parseComment() {
Expand Down

0 comments on commit fafbfc7

Please sign in to comment.